Compare commits
260 Commits
perf/cache
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
581f78d276 | ||
|
|
57ebebc725 | ||
|
|
bcd1e633e6 | ||
|
|
56d956d723 | ||
|
|
6e8a9d72ad | ||
|
|
ad120cdf70 | ||
|
|
43873c1005 | ||
|
|
adcccc9b56 | ||
|
|
b0ba9affa2 | ||
|
|
568cdeb058 | ||
|
|
e832229a9e | ||
|
|
2154e97afc | ||
|
|
7a6a8a5bbb | ||
|
|
b5d78c1b1c | ||
|
|
95e2ee7b4a | ||
|
|
2fcfe76abc | ||
|
|
584a19fadd | ||
|
|
4237065ed0 | ||
|
|
b356697e02 | ||
|
|
a957861077 | ||
|
|
32830a0527 | ||
|
|
82408b8d7f | ||
|
|
e75d9bc1d9 | ||
|
|
e743469e2e | ||
|
|
324669d8e4 | ||
|
|
0b66d5a10c | ||
|
|
3b5c60e93a | ||
|
|
1615554ba3 | ||
|
|
a1ef5e67f3 | ||
|
|
f0704907f8 | ||
|
|
bf62a3d57e | ||
|
|
fb1b766f73 | ||
|
|
f2fa038bd0 | ||
|
|
50e1a08f66 | ||
|
|
c9b403c2be | ||
|
|
871b959ed9 | ||
|
|
49661864b1 | ||
|
|
a794d5ffb2 | ||
|
|
5be08f4bc8 | ||
|
|
24dfd6aebe | ||
|
|
e0c40985a7 | ||
|
|
758c881f9d | ||
|
|
1773cc7ef1 | ||
|
|
72ffc78b62 | ||
|
|
61ef7d14c0 | ||
|
|
dc640eef32 | ||
|
|
4207163091 | ||
|
|
70b8ee6fb8 | ||
|
|
3b8793db37 | ||
|
|
1433349d10 | ||
|
|
29510a16eb | ||
|
|
424f69083b | ||
|
|
5ef3707509 | ||
|
|
9fa43f3477 | ||
|
|
c7630fa008 | ||
|
|
e35da4b6a4 | ||
|
|
e217438026 | ||
|
|
aa093339f7 | ||
|
|
6cfb8af383 | ||
|
|
30d02486ac | ||
|
|
209bf6fccc | ||
|
|
e63228e87e | ||
|
|
f7800d87fe | ||
|
|
ced450f53c | ||
|
|
353a4b9a53 | ||
|
|
86dad64f11 | ||
|
|
a3d884f4c4 | ||
|
|
c64484e392 | ||
|
|
124f488d3f | ||
|
|
811b848514 | ||
|
|
2bcfaf17d5 | ||
|
|
a38674e6aa | ||
|
|
65542764f2 | ||
|
|
64a5221e99 | ||
|
|
6b880ccf96 | ||
|
|
dd51149a5e | ||
|
|
16d3003356 | ||
|
|
62d7e0c28b | ||
|
|
4184e12751 | ||
|
|
ec4ce4aafc | ||
|
|
582cec5557 | ||
|
|
51c14f38d1 | ||
|
|
65339ab658 | ||
|
|
8bba19ca9d | ||
|
|
cd67ae8485 | ||
|
|
0ca2014244 | ||
|
|
18b43a50e8 | ||
|
|
7fa9aef4d1 | ||
|
|
9a4fe87642 | ||
|
|
44262559b9 | ||
|
|
9bce622daa | ||
|
|
4be273c9eb | ||
|
|
4ccf2ad923 | ||
|
|
ef10882fff | ||
|
|
966422d8b6 | ||
|
|
fafe76e660 | ||
|
|
dfb1e6a04d | ||
|
|
38f9042654 | ||
|
|
bed283a579 | ||
|
|
10847ebd35 | ||
|
|
6cef6d7ee2 | ||
|
|
1e0ced8c58 | ||
|
|
0dc9be4b73 | ||
|
|
25c364da11 | ||
|
|
233034fcbf | ||
|
|
9537c44056 | ||
|
|
87e0a85c79 | ||
|
|
6e636b2a6b | ||
|
|
d7d9670a3c | ||
|
|
082f630fd9 | ||
|
|
b0c604cbf7 | ||
|
|
c6dbe6d3cc | ||
|
|
e8df736a72 | ||
|
|
769cb31e84 | ||
|
|
2c4020f107 | ||
|
|
d8f7dfb37d | ||
|
|
17f377e04c | ||
|
|
48387bd1bd | ||
|
|
8ee30a0a2f | ||
|
|
c298507c31 | ||
|
|
35e04bca75 | ||
|
|
7f558e1e1b | ||
|
|
0ea647ea9e | ||
|
|
e7208f3545 | ||
|
|
e885802052 | ||
|
|
2758faca3b | ||
|
|
ed218e9607 | ||
|
|
9e319da08d | ||
|
|
08834cb0e3 | ||
|
|
ccf5f70bdf | ||
|
|
bd8b1c5892 | ||
|
|
df777cc7b1 | ||
|
|
7dcad4f8d3 | ||
|
|
5dc1544142 | ||
|
|
f762c5a609 | ||
|
|
7a41446c38 | ||
|
|
df70014092 | ||
|
|
ad323b86c8 | ||
|
|
6bcf34fa0f | ||
|
|
2d8cfdbf1d | ||
|
|
ff923514af | ||
|
|
17991ac9ae | ||
|
|
9795fa56d2 | ||
|
|
ae8da1f471 | ||
|
|
b2cb83b47e | ||
|
|
29a60f4414 | ||
|
|
f3bbe577bd | ||
|
|
263058ed33 | ||
|
|
a40fcb6757 | ||
|
|
bd0121478f | ||
|
|
1c0f497e28 | ||
|
|
a365839494 | ||
|
|
652f552f0d | ||
|
|
521cf14b92 | ||
|
|
d95dd03cc2 | ||
|
|
99f803e5ba | ||
|
|
f121395b06 | ||
|
|
ea6cf23add | ||
|
|
dba534250c | ||
|
|
ff7ca9e0d3 | ||
|
|
c7d6a16ab4 | ||
|
|
cb631b62c2 | ||
|
|
24a4c15ca9 | ||
|
|
902e1ee2b6 | ||
|
|
0bfa2efa4f | ||
|
|
77de937908 | ||
|
|
72dbd3d5a1 | ||
|
|
9914c684de | ||
|
|
95c10a4369 | ||
|
|
395b090888 | ||
|
|
a53d31016f | ||
|
|
0bd56ffb9a | ||
|
|
435b47fddc | ||
|
|
00670860ed | ||
|
|
645856d1d6 | ||
|
|
c84bff3a5f | ||
|
|
864154bc2d | ||
|
|
d83e81d451 | ||
|
|
c1296be7fd | ||
|
|
8283eaa794 | ||
|
|
12eb2a4d7d | ||
|
|
5a97268ec0 | ||
|
|
c1e8955841 | ||
|
|
0af3f95911 | ||
|
|
9f1ef7bc16 | ||
|
|
022e5eff4f | ||
|
|
89322f294e | ||
|
|
76fb1dc9e3 | ||
|
|
2c29677dc3 | ||
|
|
1c251dd9c7 | ||
|
|
7454ffb09a | ||
|
|
1960a436e0 | ||
|
|
5ecaa8427b | ||
|
|
f235f3f999 | ||
|
|
9134be49e1 | ||
|
|
0852ffee2e | ||
|
|
fd384574f5 | ||
|
|
e9f18f78c9 | ||
|
|
eeac8d5c7c | ||
|
|
c9255f9d94 | ||
|
|
5dade750e3 | ||
|
|
5ee75410d8 | ||
|
|
897a743b13 | ||
|
|
cf027fb269 | ||
|
|
fbc193b22f | ||
|
|
139ad8e09e | ||
|
|
e772b5e6e9 | ||
|
|
e46dfe7830 | ||
|
|
be94c4bdfe | ||
|
|
b1523956fe | ||
|
|
7a95b190f1 | ||
|
|
9a35c78715 | ||
|
|
c852c72e13 | ||
|
|
2d98d4fd1c | ||
|
|
87e5cd8c75 | ||
|
|
8944d59744 | ||
|
|
674b0e5a83 | ||
|
|
dfec59dd61 | ||
|
|
5e39f26be3 | ||
|
|
1b46b755cc | ||
|
|
5b57ebdb72 | ||
|
|
5c76d47844 | ||
|
|
2be59b920e | ||
|
|
b202642137 | ||
|
|
dbfe57d8cf | ||
|
|
4c3466ac6c | ||
|
|
69332397a2 | ||
|
|
bd0a7b0d67 | ||
|
|
31b54af3b8 | ||
|
|
b7d15591cf | ||
|
|
55960c88b5 | ||
|
|
4ffd028a02 | ||
|
|
0be021398c | ||
|
|
84425718aa | ||
|
|
7b212f3824 | ||
|
|
344461d30d | ||
|
|
1d836390e9 | ||
|
|
4e7225ba8c | ||
|
|
37a5f9453d | ||
|
|
cfe9e1f75b | ||
|
|
7aba505320 | ||
|
|
cca7493dce | ||
|
|
10804c6305 | ||
|
|
52aa813129 | ||
|
|
9087ed21eb | ||
|
|
180acbe7c1 | ||
|
|
b511e509e0 | ||
|
|
c39293cbaa | ||
|
|
178b0054a7 | ||
|
|
f086473c40 | ||
|
|
a9531e751b | ||
|
|
937c6cab5d | ||
|
|
52597f8dca | ||
|
|
b21ca94fb1 | ||
|
|
dd487ab8a4 | ||
|
|
cf87a692d0 | ||
|
|
519374c306 | ||
|
|
de515926e1 | ||
|
|
536d436989 | ||
|
|
a9de0c77a5 |
134
.editorconfig
Normal file
134
.editorconfig
Normal file
@ -0,0 +1,134 @@
|
||||
# EditorConfig: https://editorconfig.org
|
||||
root = true
|
||||
|
||||
# All files
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = crlf
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
# Markdown — preserve trailing spaces (two spaces = <br>)
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
# YAML / GitHub Actions
|
||||
[*.{yml,yaml}]
|
||||
indent_size = 2
|
||||
|
||||
# JSON
|
||||
[*.json]
|
||||
indent_size = 2
|
||||
|
||||
# XML project files and manifests
|
||||
[*.{csproj,props,targets,appxmanifest,manifest,resx,xaml}]
|
||||
indent_size = 2
|
||||
|
||||
# ============================================================
|
||||
# C# code style
|
||||
# ============================================================
|
||||
[*.cs]
|
||||
|
||||
# Namespace declarations — file-scoped (matches codebase convention)
|
||||
csharp_style_namespace_declarations = file_scoped:suggestion
|
||||
|
||||
# 'var' preferences
|
||||
csharp_style_var_for_built_in_types = false:suggestion
|
||||
csharp_style_var_when_type_is_apparent = true:suggestion
|
||||
csharp_style_var_elsewhere = false:suggestion
|
||||
|
||||
# Expression-bodied members — allow but don't require
|
||||
csharp_style_expression_bodied_methods = when_on_single_line:silent
|
||||
csharp_style_expression_bodied_properties = when_on_single_line:silent
|
||||
csharp_style_expression_bodied_constructors = false:silent
|
||||
|
||||
# Pattern matching
|
||||
csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion
|
||||
csharp_style_pattern_matching_over_as_with_null_check = true:suggestion
|
||||
csharp_style_prefer_pattern_matching = true:suggestion
|
||||
csharp_style_prefer_switch_expression = true:suggestion
|
||||
csharp_style_prefer_not_pattern = true:suggestion
|
||||
|
||||
# Modern C# index and range operators
|
||||
csharp_style_prefer_range_operator = true:suggestion
|
||||
csharp_style_prefer_index_operator = true:suggestion
|
||||
|
||||
# Simplified using statements (using var x = ...; without braces)
|
||||
csharp_prefer_simple_using_statement = true:suggestion
|
||||
|
||||
# Null checks
|
||||
csharp_style_conditional_delegate_call = true:suggestion
|
||||
dotnet_style_null_propagation = true:suggestion
|
||||
dotnet_style_coalesce_expression = true:suggestion
|
||||
|
||||
# Prefer 'is null' over '== null'
|
||||
dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
|
||||
|
||||
# Prefer readonly fields where possible
|
||||
dotnet_style_readonly_field = true:suggestion
|
||||
|
||||
# Brace placement — Allman style (matches codebase)
|
||||
csharp_new_line_before_open_brace = all
|
||||
csharp_new_line_before_else = true
|
||||
csharp_new_line_before_catch = true
|
||||
csharp_new_line_before_finally = true
|
||||
|
||||
# Indentation
|
||||
csharp_indent_case_contents = true
|
||||
csharp_indent_switch_labels = true
|
||||
|
||||
# Spacing
|
||||
csharp_space_after_cast = false
|
||||
csharp_space_after_keywords_in_control_flow_statements = true
|
||||
csharp_space_between_method_declaration_parameter_list_parentheses = false
|
||||
csharp_space_between_method_call_parameter_list_parentheses = false
|
||||
|
||||
# 'this.' qualification — not required
|
||||
dotnet_style_qualification_for_field = false:suggestion
|
||||
dotnet_style_qualification_for_property = false:suggestion
|
||||
dotnet_style_qualification_for_method = false:suggestion
|
||||
dotnet_style_qualification_for_event = false:suggestion
|
||||
|
||||
# Language keywords over type names (e.g. 'int' over 'Int32')
|
||||
dotnet_style_predefined_type_for_locals_parameters_members = true:suggestion
|
||||
dotnet_style_predefined_type_for_member_access = true:suggestion
|
||||
|
||||
# Object/collection initializers
|
||||
dotnet_style_object_initializer = true:suggestion
|
||||
dotnet_style_collection_initializer = true:suggestion
|
||||
|
||||
# Tuple names
|
||||
dotnet_style_explicit_tuple_names = true:suggestion
|
||||
|
||||
# Prefer simplified boolean expressions (e.g. 'x' over 'x == true')
|
||||
dotnet_style_prefer_simplified_boolean_expressions = true:suggestion
|
||||
|
||||
# Prefer inferred tuple/anonymous type member names
|
||||
dotnet_style_prefer_inferred_tuple_names = true:suggestion
|
||||
dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
|
||||
|
||||
# Naming conventions — private instance fields: _camelCase
|
||||
dotnet_naming_rule.private_fields_should_be_camel_case.severity = suggestion
|
||||
dotnet_naming_rule.private_fields_should_be_camel_case.symbols = private_instance_fields
|
||||
dotnet_naming_rule.private_fields_should_be_camel_case.style = underscore_camel_case
|
||||
|
||||
dotnet_naming_symbols.private_instance_fields.applicable_kinds = field
|
||||
dotnet_naming_symbols.private_instance_fields.applicable_accessibilities = private
|
||||
dotnet_naming_symbols.private_instance_fields.required_modifiers =
|
||||
|
||||
dotnet_naming_style.underscore_camel_case.capitalization = camel_case
|
||||
dotnet_naming_style.underscore_camel_case.required_prefix = _
|
||||
|
||||
# Static readonly fields — PascalCase or s_camelCase (both used in codebase)
|
||||
dotnet_naming_rule.static_readonly_fields.severity = silent
|
||||
dotnet_naming_rule.static_readonly_fields.symbols = static_readonly_fields
|
||||
dotnet_naming_rule.static_readonly_fields.style = s_camel_case
|
||||
|
||||
dotnet_naming_symbols.static_readonly_fields.applicable_kinds = field
|
||||
dotnet_naming_symbols.static_readonly_fields.applicable_accessibilities = private
|
||||
dotnet_naming_symbols.static_readonly_fields.required_modifiers = static, readonly
|
||||
|
||||
dotnet_naming_style.s_camel_case.capitalization = camel_case
|
||||
dotnet_naming_style.s_camel_case.required_prefix = s_
|
||||
178
.github/agents/agentic-workflows.agent.md
vendored
Normal file
178
.github/agents/agentic-workflows.agent.md
vendored
Normal file
@ -0,0 +1,178 @@
|
||||
---
|
||||
description: GitHub Agentic Workflows (gh-aw) - Create, debug, and upgrade AI-powered workflows with intelligent prompt routing
|
||||
disable-model-invocation: true
|
||||
---
|
||||
|
||||
# GitHub Agentic Workflows Agent
|
||||
|
||||
This agent helps you work with **GitHub Agentic Workflows (gh-aw)**, a CLI extension for creating AI-powered workflows in natural language using markdown files.
|
||||
|
||||
## What This Agent Does
|
||||
|
||||
This is a **dispatcher agent** that routes your request to the appropriate specialized prompt based on your task:
|
||||
|
||||
- **Creating new workflows**: Routes to `create` prompt
|
||||
- **Updating existing workflows**: Routes to `update` prompt
|
||||
- **Debugging workflows**: Routes to `debug` prompt
|
||||
- **Upgrading workflows**: Routes to `upgrade-agentic-workflows` prompt
|
||||
- **Creating report-generating workflows**: Routes to `report` prompt — consult this whenever the workflow posts status updates, audits, analyses, or any structured output as issues, discussions, or comments
|
||||
- **Creating shared components**: Routes to `create-shared-agentic-workflow` prompt
|
||||
- **Fixing Dependabot PRs**: Routes to `dependabot` prompt — use this when Dependabot opens PRs that modify generated manifest files (`.github/workflows/package.json`, `.github/workflows/requirements.txt`, `.github/workflows/go.mod`). Never merge those PRs directly; instead update the source `.md` files and rerun `gh aw compile --dependabot` to bundle all fixes
|
||||
- **Analyzing test coverage**: Routes to `test-coverage` prompt — consult this whenever the workflow reads, analyzes, or reports on test coverage data from PRs or CI runs
|
||||
|
||||
Workflows may optionally include:
|
||||
|
||||
- **Project tracking / monitoring** (GitHub Projects updates, status reporting)
|
||||
- **Orchestration / coordination** (one workflow assigning agents or dispatching and coordinating other workflows)
|
||||
|
||||
## Files This Applies To
|
||||
|
||||
- Workflow files: `.github/workflows/*.md` and `.github/workflows/**/*.md`
|
||||
- Workflow lock files: `.github/workflows/*.lock.yml`
|
||||
- Shared components: `.github/workflows/shared/*.md`
|
||||
- Configuration: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/github-agentic-workflows.md
|
||||
|
||||
## Problems This Solves
|
||||
|
||||
- **Workflow Creation**: Design secure, validated agentic workflows with proper triggers, tools, and permissions
|
||||
- **Workflow Debugging**: Analyze logs, identify missing tools, investigate failures, and fix configuration issues
|
||||
- **Version Upgrades**: Migrate workflows to new gh-aw versions, apply codemods, fix breaking changes
|
||||
- **Component Design**: Create reusable shared workflow components that wrap MCP servers
|
||||
|
||||
## How to Use
|
||||
|
||||
When you interact with this agent, it will:
|
||||
|
||||
1. **Understand your intent** - Determine what kind of task you're trying to accomplish
|
||||
2. **Route to the right prompt** - Load the specialized prompt file for your task
|
||||
3. **Execute the task** - Follow the detailed instructions in the loaded prompt
|
||||
|
||||
## Available Prompts
|
||||
|
||||
### Create New Workflow
|
||||
**Load when**: User wants to create a new workflow from scratch, add automation, or design a workflow that doesn't exist yet
|
||||
|
||||
**Prompt file**: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/create-agentic-workflow.md
|
||||
|
||||
**Use cases**:
|
||||
- "Create a workflow that triages issues"
|
||||
- "I need a workflow to label pull requests"
|
||||
- "Design a weekly research automation"
|
||||
|
||||
### Update Existing Workflow
|
||||
**Load when**: User wants to modify, improve, or refactor an existing workflow
|
||||
|
||||
**Prompt file**: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/update-agentic-workflow.md
|
||||
|
||||
**Use cases**:
|
||||
- "Add web-fetch tool to the issue-classifier workflow"
|
||||
- "Update the PR reviewer to use discussions instead of issues"
|
||||
- "Improve the prompt for the weekly-research workflow"
|
||||
|
||||
### Debug Workflow
|
||||
**Load when**: User needs to investigate, audit, debug, or understand a workflow, troubleshoot issues, analyze logs, or fix errors
|
||||
|
||||
**Prompt file**: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/debug-agentic-workflow.md
|
||||
|
||||
**Use cases**:
|
||||
- "Why is this workflow failing?"
|
||||
- "Analyze the logs for workflow X"
|
||||
- "Investigate missing tool calls in run #12345"
|
||||
|
||||
### Upgrade Agentic Workflows
|
||||
**Load when**: User wants to upgrade workflows to a new gh-aw version or fix deprecations
|
||||
|
||||
**Prompt file**: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/upgrade-agentic-workflows.md
|
||||
|
||||
**Use cases**:
|
||||
- "Upgrade all workflows to the latest version"
|
||||
- "Fix deprecated fields in workflows"
|
||||
- "Apply breaking changes from the new release"
|
||||
|
||||
### Create a Report-Generating Workflow
|
||||
**Load when**: The workflow being created or updated produces reports — recurring status updates, audit summaries, analyses, or any structured output posted as a GitHub issue, discussion, or comment
|
||||
|
||||
**Prompt file**: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/report.md
|
||||
|
||||
**Use cases**:
|
||||
- "Create a weekly CI health report"
|
||||
- "Post a daily security audit to Discussions"
|
||||
- "Add a status update comment to open PRs"
|
||||
|
||||
### Create Shared Agentic Workflow
|
||||
**Load when**: User wants to create a reusable workflow component or wrap an MCP server
|
||||
|
||||
**Prompt file**: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/create-shared-agentic-workflow.md
|
||||
|
||||
**Use cases**:
|
||||
- "Create a shared component for Notion integration"
|
||||
- "Wrap the Slack MCP server as a reusable component"
|
||||
- "Design a shared workflow for database queries"
|
||||
|
||||
### Fix Dependabot PRs
|
||||
**Load when**: User needs to close or fix open Dependabot PRs that update dependencies in generated manifest files (`.github/workflows/package.json`, `.github/workflows/requirements.txt`, `.github/workflows/go.mod`)
|
||||
|
||||
**Prompt file**: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/dependabot.md
|
||||
|
||||
**Use cases**:
|
||||
- "Fix the open Dependabot PRs for npm dependencies"
|
||||
- "Bundle and close the Dependabot PRs for workflow dependencies"
|
||||
- "Update @playwright/test to fix the Dependabot PR"
|
||||
|
||||
### Analyze Test Coverage
|
||||
**Load when**: The workflow reads, analyzes, or reports test coverage — whether triggered by a PR, a schedule, or a slash command. Always consult this prompt before designing the coverage data strategy.
|
||||
|
||||
**Prompt file**: https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/test-coverage.md
|
||||
|
||||
**Use cases**:
|
||||
- "Create a workflow that comments coverage on PRs"
|
||||
- "Analyze coverage trends over time"
|
||||
- "Add a coverage gate that blocks PRs below a threshold"
|
||||
|
||||
## Instructions
|
||||
|
||||
When a user interacts with you:
|
||||
|
||||
1. **Identify the task type** from the user's request
|
||||
2. **Load the appropriate prompt** from the GitHub repository URLs listed above
|
||||
3. **Follow the loaded prompt's instructions** exactly
|
||||
4. **If uncertain**, ask clarifying questions to determine the right prompt
|
||||
|
||||
## Quick Reference
|
||||
|
||||
```bash
|
||||
# Initialize repository for agentic workflows
|
||||
gh aw init
|
||||
|
||||
# Generate the lock file for a workflow
|
||||
gh aw compile [workflow-name]
|
||||
|
||||
# Debug workflow runs
|
||||
gh aw logs [workflow-name]
|
||||
gh aw audit <run-id>
|
||||
|
||||
# Upgrade workflows
|
||||
gh aw fix --write
|
||||
gh aw compile --validate
|
||||
```
|
||||
|
||||
## Key Features of gh-aw
|
||||
|
||||
- **Natural Language Workflows**: Write workflows in markdown with YAML frontmatter
|
||||
- **AI Engine Support**: Copilot, Claude, Codex, or custom engines
|
||||
- **MCP Server Integration**: Connect to Model Context Protocol servers for tools
|
||||
- **Safe Outputs**: Structured communication between AI and GitHub API
|
||||
- **Strict Mode**: Security-first validation and sandboxing
|
||||
- **Shared Components**: Reusable workflow building blocks
|
||||
- **Repo Memory**: Persistent git-backed storage for agents
|
||||
- **Sandboxed Execution**: All workflows run in the Agent Workflow Firewall (AWF) sandbox, enabling full `bash` and `edit` tools by default
|
||||
|
||||
## Important Notes
|
||||
|
||||
- Always reference the instructions file at https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/github-agentic-workflows.md for complete documentation
|
||||
- Use the MCP tool `agentic-workflows` when running in GitHub Copilot Cloud
|
||||
- Workflows must be compiled to `.lock.yml` files before running in GitHub Actions
|
||||
- **Bash tools are enabled by default** - Don't restrict bash commands unnecessarily since workflows are sandboxed by the AWF
|
||||
- Follow security best practices: minimal permissions, explicit network access, no template injection
|
||||
- **Network configuration**: Use ecosystem identifiers (`node`, `python`, `go`, etc.) or explicit FQDNs in `network.allowed`. Bare shorthands like `npm` or `pypi` are **not** valid. See https://github.com/github/gh-aw/blob/v0.68.3/.github/aw/network.md for the full list of valid ecosystem identifiers and domain patterns.
|
||||
- **Single-file output**: When creating a workflow, produce exactly **one** workflow `.md` file. Do not create separate documentation files (architecture docs, runbooks, usage guides, etc.). If documentation is needed, add a brief `## Usage` section inside the workflow file itself.
|
||||
66
.github/aw/actions-lock.json
vendored
Normal file
66
.github/aw/actions-lock.json
vendored
Normal file
@ -0,0 +1,66 @@
|
||||
{
|
||||
"entries": {
|
||||
"github/gh-aw-actions/setup@v0.68.3": {
|
||||
"repo": "github/gh-aw-actions/setup",
|
||||
"version": "v0.68.3",
|
||||
"sha": "ba90f2186d7ad780ec640f364005fa24e797b360"
|
||||
},
|
||||
"github/gh-aw-actions/setup@v0.68.7": {
|
||||
"repo": "github/gh-aw-actions/setup",
|
||||
"version": "v0.68.7",
|
||||
"sha": "69af89ae134d818caa7743b23ad966ce03914a27"
|
||||
}
|
||||
},
|
||||
"containers": {
|
||||
"ghcr.io/github/gh-aw-firewall/agent:0.25.18": {
|
||||
"image": "ghcr.io/github/gh-aw-firewall/agent:0.25.18",
|
||||
"digest": "sha256:c77e8c26bab6c39e8568d8e2f8c17015944849a8cbcdfb4bd9725d8893725ca2",
|
||||
"pinned_image": "ghcr.io/github/gh-aw-firewall/agent:0.25.18@sha256:c77e8c26bab6c39e8568d8e2f8c17015944849a8cbcdfb4bd9725d8893725ca2"
|
||||
},
|
||||
"ghcr.io/github/gh-aw-firewall/agent:0.25.20": {
|
||||
"image": "ghcr.io/github/gh-aw-firewall/agent:0.25.20",
|
||||
"digest": "sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682",
|
||||
"pinned_image": "ghcr.io/github/gh-aw-firewall/agent:0.25.20@sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682"
|
||||
},
|
||||
"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.18": {
|
||||
"image": "ghcr.io/github/gh-aw-firewall/api-proxy:0.25.18",
|
||||
"digest": "sha256:d16a40a3ca6e989896d0cef9f31b9412bb1fcc8755bafcafb95012ae1078539b",
|
||||
"pinned_image": "ghcr.io/github/gh-aw-firewall/api-proxy:0.25.18@sha256:d16a40a3ca6e989896d0cef9f31b9412bb1fcc8755bafcafb95012ae1078539b"
|
||||
},
|
||||
"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20": {
|
||||
"image": "ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20",
|
||||
"digest": "sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519",
|
||||
"pinned_image": "ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20@sha256:6971639e381e82e45134bcd333181f456df3a52cd6f818a3e3d6de068ff91519"
|
||||
},
|
||||
"ghcr.io/github/gh-aw-firewall/squid:0.25.18": {
|
||||
"image": "ghcr.io/github/gh-aw-firewall/squid:0.25.18",
|
||||
"digest": "sha256:eb102afcfbae26ffcec016adebb74d3be7b0a5bf376ba306599cdf3effbe288e",
|
||||
"pinned_image": "ghcr.io/github/gh-aw-firewall/squid:0.25.18@sha256:eb102afcfbae26ffcec016adebb74d3be7b0a5bf376ba306599cdf3effbe288e"
|
||||
},
|
||||
"ghcr.io/github/gh-aw-firewall/squid:0.25.20": {
|
||||
"image": "ghcr.io/github/gh-aw-firewall/squid:0.25.20",
|
||||
"digest": "sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236",
|
||||
"pinned_image": "ghcr.io/github/gh-aw-firewall/squid:0.25.20@sha256:5411d903f73ee597e6a084971c2adef3eb0bd405910df3ed7bf5e3d6bd58a236"
|
||||
},
|
||||
"ghcr.io/github/gh-aw-mcpg:v0.2.17": {
|
||||
"image": "ghcr.io/github/gh-aw-mcpg:v0.2.17",
|
||||
"digest": "sha256:a6dec6ec535a11c565d982afa2f98589805ed0598862b9ea9d3c751fc71afae8",
|
||||
"pinned_image": "ghcr.io/github/gh-aw-mcpg:v0.2.17@sha256:a6dec6ec535a11c565d982afa2f98589805ed0598862b9ea9d3c751fc71afae8"
|
||||
},
|
||||
"ghcr.io/github/gh-aw-mcpg:v0.2.19": {
|
||||
"image": "ghcr.io/github/gh-aw-mcpg:v0.2.19",
|
||||
"digest": "sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd",
|
||||
"pinned_image": "ghcr.io/github/gh-aw-mcpg:v0.2.19@sha256:44d4d8de7e6c37aaea484eba489940c52df6a0b54078ddcbc9327592d5b3c3dd"
|
||||
},
|
||||
"ghcr.io/github/github-mcp-server:v0.32.0": {
|
||||
"image": "ghcr.io/github/github-mcp-server:v0.32.0",
|
||||
"digest": "sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28",
|
||||
"pinned_image": "ghcr.io/github/github-mcp-server:v0.32.0@sha256:2763823c63bcca718ce53850a1d7fcf2f501ec84028394f1b63ce7e9f4f9be28"
|
||||
},
|
||||
"node:lts-alpine": {
|
||||
"image": "node:lts-alpine",
|
||||
"digest": "sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f",
|
||||
"pinned_image": "node:lts-alpine@sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f"
|
||||
}
|
||||
}
|
||||
}
|
||||
13
.github/dependabot.yml
vendored
Normal file
13
.github/dependabot.yml
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
|
||||
- package-ecosystem: "nuget"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
day: "monday"
|
||||
189
.github/workflows/ci.yml
vendored
189
.github/workflows/ci.yml
vendored
@ -11,15 +11,22 @@ jobs:
|
||||
test:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup .NET 10
|
||||
uses: actions/setup-dotnet@v4
|
||||
uses: actions/setup-dotnet@v5
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
|
||||
- name: Cache NuGet packages
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }}
|
||||
restore-keys: nuget-${{ runner.os }}-
|
||||
|
||||
- name: Install GitVersion
|
||||
uses: gittools/actions/gitversion/setup@v4
|
||||
with:
|
||||
@ -32,21 +39,106 @@ jobs:
|
||||
- name: Restore dependencies
|
||||
run: dotnet restore
|
||||
|
||||
# dotnet-coverage replaces coverlet because the integration tests spawn the
|
||||
# tray exe out-of-process; coverlet only instruments the in-proc test
|
||||
# assembly. Installing once at the job level lets every test step wrap its
|
||||
# `dotnet test` invocation in `dotnet-coverage collect`.
|
||||
- name: Install dotnet-coverage
|
||||
run: dotnet tool install --global dotnet-coverage
|
||||
|
||||
- name: Build Shared Library
|
||||
run: dotnet build src/OpenClaw.Shared -c Debug
|
||||
run: dotnet build src/OpenClaw.Shared -c Debug --no-restore
|
||||
|
||||
- name: Build Tray App (WinUI)
|
||||
run: dotnet build src/OpenClaw.Tray.WinUI -c Debug -r win-x64
|
||||
|
||||
- name: Build Tests
|
||||
run: |
|
||||
dotnet build tests/OpenClaw.Shared.Tests -c Debug
|
||||
dotnet build tests/OpenClaw.Tray.Tests -c Debug
|
||||
dotnet build tests/OpenClaw.Shared.Tests -c Debug --no-restore
|
||||
dotnet build tests/OpenClaw.Tray.Tests -c Debug -r win-x64 --no-restore
|
||||
dotnet build tests/OpenClaw.Tray.IntegrationTests -c Debug -r win-x64 --no-restore
|
||||
dotnet build tests/OpenClaw.Tray.UITests -c Debug -r win-x64 --no-restore
|
||||
|
||||
- name: Run Tests
|
||||
- name: Run Shared Tests
|
||||
env:
|
||||
OPENCLAW_RUN_INTEGRATION: 1
|
||||
run: >
|
||||
dotnet-coverage collect
|
||||
--output TestResults\Shared\coverage.cobertura.xml
|
||||
--output-format cobertura
|
||||
"dotnet test tests/OpenClaw.Shared.Tests
|
||||
--no-build
|
||||
-c Debug
|
||||
--verbosity normal
|
||||
--results-directory TestResults\Shared
|
||||
--logger trx;LogFileName=OpenClaw.Shared.Tests.trx"
|
||||
|
||||
- name: Run Tray Tests
|
||||
run: >
|
||||
dotnet-coverage collect
|
||||
--output TestResults\Tray\coverage.cobertura.xml
|
||||
--output-format cobertura
|
||||
"dotnet test tests/OpenClaw.Tray.Tests
|
||||
--no-build
|
||||
-c Debug
|
||||
-r win-x64
|
||||
--verbosity normal
|
||||
--results-directory TestResults\Tray
|
||||
--logger trx;LogFileName=OpenClaw.Tray.Tests.trx"
|
||||
|
||||
# Tray integration tests gate on OPENCLAW_RUN_INTEGRATION; set it so the
|
||||
# MCP-server / capability tests actually run. dotnet-coverage with no
|
||||
# filter captures coverage for both the test host AND the spawned tray
|
||||
# exe (coverlet could not — see tests/Directory.Build.props comment).
|
||||
- name: Run Tray Integration Tests
|
||||
env:
|
||||
OPENCLAW_RUN_INTEGRATION: 1
|
||||
run: >
|
||||
dotnet-coverage collect
|
||||
--output TestResults\TrayIntegration\coverage.cobertura.xml
|
||||
--output-format cobertura
|
||||
"dotnet test tests/OpenClaw.Tray.IntegrationTests
|
||||
--no-build
|
||||
-c Debug
|
||||
-r win-x64
|
||||
--verbosity normal
|
||||
--results-directory TestResults\TrayIntegration
|
||||
--logger trx;LogFileName=OpenClaw.Tray.IntegrationTests.trx"
|
||||
|
||||
# UI tests need a real visual tree AND a system-registered WindowsAppRuntime
|
||||
# framework MSIX — the test fixture calls Bootstrap.Initialize(1.8, stable),
|
||||
# which looks up the framework package by identity. The hosted windows-2025
|
||||
# runner image doesn't preinstall it, so we install it explicitly here.
|
||||
# Version pinned to match Microsoft.WindowsAppSDK 1.8.260101001 in the csprojs.
|
||||
- name: Install WindowsAppRuntime 1.8
|
||||
shell: pwsh
|
||||
run: |
|
||||
dotnet test tests/OpenClaw.Shared.Tests --no-build -c Debug --verbosity normal
|
||||
dotnet test tests/OpenClaw.Tray.Tests --no-build -c Debug --verbosity normal
|
||||
$url = "https://aka.ms/windowsappsdk/1.8/1.8.260101001/windowsappruntimeinstall-x64.exe"
|
||||
$exe = "$env:RUNNER_TEMP\WindowsAppRuntimeInstall.exe"
|
||||
Invoke-WebRequest -Uri $url -OutFile $exe
|
||||
& $exe --quiet
|
||||
if ($LASTEXITCODE -ne 0) { throw "WindowsAppRuntimeInstall failed with exit code $LASTEXITCODE" }
|
||||
|
||||
- name: Run Tray UI Tests
|
||||
run: >
|
||||
dotnet-coverage collect
|
||||
--output TestResults\TrayUI\coverage.cobertura.xml
|
||||
--output-format cobertura
|
||||
"dotnet test tests/OpenClaw.Tray.UITests
|
||||
--no-build
|
||||
-c Debug
|
||||
-r win-x64
|
||||
--verbosity normal
|
||||
--results-directory TestResults\TrayUI
|
||||
--logger trx;LogFileName=OpenClaw.Tray.UITests.trx"
|
||||
|
||||
- name: Upload Test Results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: test-results
|
||||
path: TestResults/
|
||||
if-no-files-found: warn
|
||||
|
||||
outputs:
|
||||
semVer: ${{ steps.gitversion.outputs.semVer }}
|
||||
@ -60,13 +152,20 @@ jobs:
|
||||
rid: [win-x64, win-arm64]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Setup .NET 10
|
||||
uses: actions/setup-dotnet@v4
|
||||
uses: actions/setup-dotnet@v5
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
|
||||
- name: Cache NuGet packages
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }}
|
||||
restore-keys: nuget-${{ runner.os }}-
|
||||
|
||||
- name: Restore WinUI Tray App
|
||||
run: dotnet restore src/OpenClaw.Tray.WinUI -r ${{ matrix.rid }}
|
||||
|
||||
@ -74,11 +173,11 @@ jobs:
|
||||
run: dotnet build src/OpenClaw.Tray.WinUI --no-restore -c Release -r ${{ matrix.rid }} -p:Version=${{ needs.test.outputs.semVer }}
|
||||
|
||||
- name: Publish WinUI Tray App
|
||||
run: dotnet publish src/OpenClaw.Tray.WinUI -c Release -r ${{ matrix.rid }} --self-contained -p:Version=${{ needs.test.outputs.semVer }} -o publish
|
||||
run: dotnet publish src/OpenClaw.Tray.WinUI -c Release -r ${{ matrix.rid }} --self-contained --no-restore -p:Version=${{ needs.test.outputs.semVer }} -o publish
|
||||
|
||||
- name: Azure Login for Signing
|
||||
if: startsWith(github.ref, 'refs/tags/v') && matrix.rid != 'win-arm64'
|
||||
uses: azure/login@v2
|
||||
uses: azure/login@v3
|
||||
with:
|
||||
creds: '{"clientId":"${{ secrets.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ secrets.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.AZURE_TENANT_ID }}"}'
|
||||
|
||||
@ -99,7 +198,7 @@ jobs:
|
||||
timestamp-digest: SHA256
|
||||
|
||||
- name: Upload Tray Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: openclaw-tray-${{ matrix.rid }}
|
||||
path: publish/
|
||||
@ -119,15 +218,30 @@ jobs:
|
||||
platform: ARM64
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Setup .NET 10
|
||||
uses: actions/setup-dotnet@v4
|
||||
- name: Setup .NET 10 for VS MSBuild
|
||||
uses: actions/setup-dotnet@v5
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
dotnet-version: 10.0.100
|
||||
|
||||
- name: Pin .NET SDK for MSIX packaging
|
||||
shell: pwsh
|
||||
run: |
|
||||
$globalJson = Get-Content global.json -Raw | ConvertFrom-Json
|
||||
$globalJson.sdk.rollForward = "disable"
|
||||
$globalJson | ConvertTo-Json -Depth 5 | Set-Content global.json
|
||||
dotnet --version
|
||||
|
||||
- name: Cache NuGet packages
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }}
|
||||
restore-keys: nuget-${{ runner.os }}-
|
||||
|
||||
- name: Setup MSBuild
|
||||
uses: microsoft/setup-msbuild@v2
|
||||
uses: microsoft/setup-msbuild@v3
|
||||
|
||||
- name: Restore
|
||||
run: dotnet restore src/OpenClaw.Tray.WinUI -r ${{ matrix.rid }}
|
||||
@ -171,7 +285,7 @@ jobs:
|
||||
|
||||
- name: Sign MSIX
|
||||
if: startsWith(github.ref, 'refs/tags/v') && matrix.rid != 'win-arm64'
|
||||
uses: azure/login@v2
|
||||
uses: azure/login@v3
|
||||
with:
|
||||
creds: '{"clientId":"${{ secrets.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ secrets.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.AZURE_TENANT_ID }}"}'
|
||||
|
||||
@ -193,7 +307,7 @@ jobs:
|
||||
timestamp-digest: SHA256
|
||||
|
||||
- name: Upload MSIX Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: openclaw-msix-${{ matrix.rid }}
|
||||
path: ${{ steps.find-msix.outputs.msix_path }}
|
||||
@ -206,21 +320,28 @@ jobs:
|
||||
platform: [x64, arm64]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Setup .NET 10
|
||||
uses: actions/setup-dotnet@v4
|
||||
uses: actions/setup-dotnet@v5
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
|
||||
- name: Cache NuGet packages
|
||||
uses: actions/cache@v5
|
||||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }}
|
||||
restore-keys: nuget-${{ runner.os }}-
|
||||
|
||||
- name: Restore Command Palette Extension
|
||||
run: dotnet restore src/OpenClaw.CommandPalette
|
||||
|
||||
- name: Build Command Palette Extension
|
||||
run: dotnet build src/OpenClaw.CommandPalette -c Debug -p:Platform=${{ matrix.platform }}
|
||||
run: dotnet build src/OpenClaw.CommandPalette -c Debug -p:Platform=${{ matrix.platform }} --no-restore
|
||||
|
||||
- name: Upload Extension Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: openclaw-commandpalette-${{ matrix.platform }}
|
||||
path: src/OpenClaw.CommandPalette/bin/${{ matrix.platform }}/Debug/
|
||||
@ -233,34 +354,34 @@ jobs:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Download win-x64 tray artifact
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: openclaw-tray-win-x64
|
||||
path: artifacts/tray-win-x64
|
||||
|
||||
- name: Download win-arm64 tray artifact
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: openclaw-tray-win-arm64
|
||||
path: artifacts/tray-win-arm64
|
||||
|
||||
- name: Download win-x64 cmdpal artifact
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: openclaw-commandpalette-x64
|
||||
path: artifacts/cmdpal-x64
|
||||
|
||||
- name: Download win-arm64 cmdpal artifact
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v8
|
||||
with:
|
||||
name: openclaw-commandpalette-arm64
|
||||
path: artifacts/cmdpal-arm64
|
||||
|
||||
- name: Download win-x64 MSIX artifact
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v8
|
||||
continue-on-error: true
|
||||
id: msix-x64
|
||||
with:
|
||||
@ -268,7 +389,7 @@ jobs:
|
||||
path: artifacts/msix-x64
|
||||
|
||||
- name: Download win-arm64 MSIX artifact
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v8
|
||||
continue-on-error: true
|
||||
id: msix-arm64
|
||||
with:
|
||||
@ -286,7 +407,7 @@ jobs:
|
||||
|
||||
# Sign ARM64 artifacts on x64 runner (ARM64 runner can't run the signing dlib)
|
||||
- name: Azure Login for ARM64 Signing
|
||||
uses: azure/login@v2
|
||||
uses: azure/login@v3
|
||||
with:
|
||||
creds: '{"clientId":"${{ secrets.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ secrets.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.AZURE_TENANT_ID }}"}'
|
||||
|
||||
@ -359,7 +480,7 @@ jobs:
|
||||
& "C:\Program Files (x86)\Inno Setup 6\ISCC.exe" /DMyAppVersion=${{ needs.test.outputs.majorMinorPatch }} /DMyAppArch=arm64 /Dpublish=publish-arm64 installer.iss
|
||||
|
||||
- name: Azure Login for Signing
|
||||
uses: azure/login@v2
|
||||
uses: azure/login@v3
|
||||
with:
|
||||
creds: '{"clientId":"${{ secrets.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ secrets.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.AZURE_TENANT_ID }}"}'
|
||||
|
||||
@ -379,7 +500,7 @@ jobs:
|
||||
timestamp-digest: SHA256
|
||||
|
||||
- name: Create Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
uses: softprops/action-gh-release@v3
|
||||
with:
|
||||
generate_release_notes: true
|
||||
files: |
|
||||
|
||||
26
.github/workflows/copilot-setup-steps.yml
vendored
Normal file
26
.github/workflows/copilot-setup-steps.yml
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
name: "Copilot Setup Steps"
|
||||
|
||||
# This workflow configures the environment for GitHub Copilot Agent with gh-aw MCP server
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
paths:
|
||||
- .github/workflows/copilot-setup-steps.yml
|
||||
|
||||
jobs:
|
||||
# The job MUST be called 'copilot-setup-steps' to be recognized by GitHub Copilot Agent
|
||||
copilot-setup-steps:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Set minimal permissions for setup steps
|
||||
# Copilot Agent receives its own token with appropriate permissions
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
- name: Install gh-aw extension
|
||||
uses: github/gh-aw-actions/setup-cli@07c7335cd76c4d4d9f00dd7874f85ff55ed71f24 # v0.71.3
|
||||
with:
|
||||
version: v0.68.1
|
||||
1914
.github/workflows/repo-assist.lock.yml
generated
vendored
1914
.github/workflows/repo-assist.lock.yml
generated
vendored
File diff suppressed because it is too large
Load Diff
35
.github/workflows/repo-assist.md
vendored
35
.github/workflows/repo-assist.md
vendored
@ -33,7 +33,24 @@ network:
|
||||
- rust
|
||||
- java
|
||||
|
||||
checkout:
|
||||
fetch: ["*"] # fetch all remote branches to allow working on PR branches
|
||||
fetch-depth: 0 # fetch full history
|
||||
|
||||
tools:
|
||||
web-fetch:
|
||||
github:
|
||||
toolsets: [all]
|
||||
min-integrity: none # This workflow is allowed to examine and comment on any issues or PRs
|
||||
bash: true
|
||||
repo-memory: true
|
||||
|
||||
safe-outputs:
|
||||
messages:
|
||||
footer: "> Generated by 🌈 {workflow_name}, see [workflow run]({run_url}). [Learn more](https://github.com/githubnext/agentics/blob/main/docs/repo-assist.md)."
|
||||
run-started: "{workflow_name} is processing {event_type}, see [workflow run]({run_url})..."
|
||||
run-success: "✓ {workflow_name} completed successfully, see [workflow run]({run_url})."
|
||||
run-failure: "✗ {workflow_name} encountered {status}, see [workflow run]({run_url})."
|
||||
add-comment:
|
||||
max: 10
|
||||
target: "*"
|
||||
@ -48,6 +65,7 @@ safe-outputs:
|
||||
target: "*"
|
||||
title-prefix: "[Repo Assist] "
|
||||
max: 4
|
||||
protected-files: fallback-to-issue
|
||||
create-issue:
|
||||
title-prefix: "[Repo Assist] "
|
||||
labels: [automation, repo-assist]
|
||||
@ -59,18 +77,11 @@ safe-outputs:
|
||||
add-labels:
|
||||
allowed: [bug, enhancement, "help wanted", "good first issue", "spam", "off topic", documentation, question, duplicate, wontfix, "needs triage", "needs investigation", "breaking change", performance, security, refactor]
|
||||
max: 30
|
||||
target: "*"
|
||||
target: "*"
|
||||
remove-labels:
|
||||
allowed: [bug, enhancement, "help wanted", "good first issue", "spam", "off topic", documentation, question, duplicate, wontfix, "needs triage", "needs investigation", "breaking change", performance, security, refactor]
|
||||
max: 5
|
||||
target: "*"
|
||||
|
||||
tools:
|
||||
web-fetch:
|
||||
github:
|
||||
toolsets: [all]
|
||||
bash: true
|
||||
repo-memory: true
|
||||
target: "*"
|
||||
|
||||
steps:
|
||||
- name: Fetch repo data for task weighting
|
||||
@ -165,7 +176,7 @@ steps:
|
||||
json.dump(result, f, indent=2)
|
||||
EOF
|
||||
|
||||
source: githubnext/agentics/workflows/repo-assist.md@cbb46ab386962aa371045839fc9998ee4e97ca64
|
||||
source: githubnext/agentics/workflows/repo-assist.md@97143ac59cb3a13ef2a77581f929f06719c7402a
|
||||
engine: copilot
|
||||
---
|
||||
|
||||
@ -313,7 +324,7 @@ Maintain a single open issue titled `[Repo Assist] Monthly Activity {YYYY}-{MM}`
|
||||
|
||||
## Suggested Actions for Maintainer
|
||||
|
||||
**Comprehensive list** of all pending actions requiring maintainer attention (excludes items already actioned and checked off).
|
||||
**Comprehensive list** of all pending actions requiring maintainer attention (excludes items already actioned and checked off).
|
||||
- Reread the issue you're updating before you update it - there may be new checkbox adjustments since your last update that require you to adjust the suggested actions.
|
||||
- List **all** the comments, PRs, and issues that need attention
|
||||
- Exclude **all** items that have either
|
||||
@ -379,4 +390,4 @@ Maintain a single open issue titled `[Repo Assist] Monthly Activity {YYYY}-{MM}`
|
||||
- **Systematic**: use the backlog cursor to process oldest issues first over successive runs. Do not stop early.
|
||||
- **Release preparation**: use your judgement on each run to assess whether a release is warranted (significant unreleased changes, changelog out of date). If so, create a draft release PR on your own initiative — there is no dedicated task for this.
|
||||
- **Quality over quantity**: noise erodes trust. Do nothing rather than add low-value output.
|
||||
- **Bias toward action**: While avoiding spam, actively seek ways to contribute value within the two selected tasks. A "no action" run should be genuinely exceptional.
|
||||
- **Bias toward action**: While avoiding spam, actively seek ways to contribute value within the two selected tasks. A "no action" run should be genuinely exceptional.
|
||||
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@ -344,3 +344,10 @@ MigrationBackup/
|
||||
# Fody - auto-generated XML schema
|
||||
FodyWeavers.xsd
|
||||
Output/
|
||||
*.lscache
|
||||
test_ws.py
|
||||
|
||||
# Local visual test output
|
||||
visual-test-output/
|
||||
|
||||
.squad/
|
||||
|
||||
31
.squad/decisions/inbox/aaron-actually-fix-2-3.md
Normal file
31
.squad/decisions/inbox/aaron-actually-fix-2-3.md
Normal file
@ -0,0 +1,31 @@
|
||||
# Aaron: actual fixes for PR #274 bugs 2 and 3
|
||||
|
||||
## Bug 2 — tray quick-chat broken
|
||||
|
||||
Traced tray left-click to `InitializeTrayIcon()` -> `_trayIcon.Selected += OnTrayIconSelected` -> `OnTrayIconSelected()` -> `ShowChatWindow()`. The quick-chat path did use `ShowChatWindow`, but it resolved only `settings.Token` while the working operator client resolves `settings.Token`, `settings.BootstrapToken`, then stored `DeviceIdentity.DeviceToken` via `GatewayCredentialResolver`.
|
||||
|
||||
Changes:
|
||||
- `App.ShowChatWindow()` and chat pre-warm now use the same `GatewayCredentialResolver` pattern as the operator client.
|
||||
- `ShowChatWindow()` calls `ChatWindow.RefreshCredentials()` on every tray click, including newly-created windows.
|
||||
- `ChatWindow.RefreshCredentials()` always rebuilds the URL and navigates initialized WebView2 to it; it no longer returns early when the same stale URL is cached.
|
||||
- Added diagnostic logs: `[ChatWindow] Quick-chat credentials resolved from ...` and `[ChatWindow] Refreshing to ...`.
|
||||
- Applied Mattingly Bug 4 handoff: bootstrap injection now runs from `ChatWindow` after successful WebView navigation.
|
||||
|
||||
Manual validation for Mike: click tray icon; tail `%LOCALAPPDATA%\OpenClawTray\openclaw-tray.log` and look for `[ChatWindow] Refreshing to ...`, then verify chat loads without login loop.
|
||||
|
||||
## Bug 3 — pairing toast notification storm
|
||||
|
||||
Searched toast paths and traced pairing notifications through `WindowsNodeClient` direct `PairingStatusChanged` emitters (`pairing.requested`, `pairing.resolved`, `NOT_PAIRED`, and `hello-ok`) plus tray toasts in `App.OnPairingStatusChanged()` and `App.OnNodeStatusChanged()`.
|
||||
|
||||
Changes:
|
||||
- Routed all `WindowsNodeClient` pairing emitters through `EmitPairingStatusOnTransition()`; duplicates now log `[NODE] Suppressing duplicate pairing status event: ...`.
|
||||
- Added a toast-boundary 30-second dedupe in `App.ShowToast(builder, toastTag, deviceId)`, keyed by `(toastTag, deviceId)`.
|
||||
- Tagged node pairing pending/paired/rejected and node-connected toasts.
|
||||
- Suppressed the node-connected toast if a node-paired toast was just shown for the same device.
|
||||
- Added diagnostic logs: `[ToastDeduper] Showing toast tag=... deviceId=...` and `[ToastDeduper] Suppressed duplicate toast tag=... deviceId=...`.
|
||||
|
||||
Manual validation for Mike: complete pairing; expect exactly one node-paired toast and log line `[ToastDeduper] Showing toast tag=node-paired deviceId=...`; duplicates should log suppression.
|
||||
|
||||
## Validation
|
||||
|
||||
Ran `./build.ps1`: passed. Per fast-loop directive, skipped `dotnet test`.
|
||||
45
.squad/decisions/inbox/mattingly-actually-fix-1-4-5.md
Normal file
45
.squad/decisions/inbox/mattingly-actually-fix-1-4-5.md
Normal file
@ -0,0 +1,45 @@
|
||||
# Mattingly: actual fixes for PR #274 bugs 1, 4, 5
|
||||
|
||||
## Bug 1 — chat window auto-launch on Finish
|
||||
|
||||
Changed `OnboardingWindow.OnWizardComplete()` to ignore `WizardLifecycleState == "complete"`. The signal now is: the window is completing from `OnboardingRoute.Ready` and `StartupSetupState.RequiresSetup(settings, identityDataPath)` is false. That is the path the Finish button actually takes: `Ready` page Finish -> `OnboardingState.Complete()` -> `OnOnboardingFinished()` -> `OnWizardComplete()`.
|
||||
|
||||
Log to validate: `[OnboardingWindow] OnWizardComplete launching chat`.
|
||||
|
||||
## Bug 4 — BOOTSTRAP.md kickoff injection
|
||||
|
||||
Hardened `BootstrapMessageInjector`:
|
||||
|
||||
- Traverses shadow DOM for Lit UI controls.
|
||||
- Probes and logs visible control count: `[OpenClaw] Bootstrap probe controls=N`.
|
||||
- Supports `textarea`, text inputs, contenteditable, and role=textbox.
|
||||
- Uses native value setters so controlled inputs see the value.
|
||||
- Clicks Send/form-submit/Enter fallbacks.
|
||||
- Does **not** burn `HasInjectedFirstRunBootstrap` when the script returns `no-input`; the gate is only persisted on `sent`.
|
||||
|
||||
Aaron still needs to move the call site to after successful chat navigation because current `App.ShowChatWindow()` can see `TryGetScriptExecutor()==null` when the WebView2 is still initializing.
|
||||
|
||||
Exact handoff line for Aaron in `ChatWindow.xaml.cs` NavigationCompleted success branch after `RequestChatInputFocus();`:
|
||||
|
||||
```csharp
|
||||
OpenClawTray.Services.BootstrapMessageInjector.ScriptExecutor exec = script => WebView.CoreWebView2.ExecuteScriptAsync(script).AsTask();
|
||||
_ = OpenClawTray.Services.BootstrapMessageInjector.InjectAsync(exec, ((App)Microsoft.UI.Xaml.Application.Current).Settings, initialDelayMs: 500);
|
||||
```
|
||||
|
||||
If `App.Settings` is not exposed, add an internal property returning `_settings`, or route the existing `_settings` from `App.ShowChatWindow()` into a ChatWindow method. The important point is that the call must happen inside `NavigationCompleted` when `e.IsSuccess` is true.
|
||||
|
||||
## Bug 5 — autostart default/toggle
|
||||
|
||||
Changed `ReadyPage` to render the toggle ON as a safety default, then sync to `Settings.AutoStart` on mount and immediately call `AutoStartManager.SetAutoStart()` so a user who never toggles still gets the Run-key. The toggle handler still persists settings and updates the Run-key immediately.
|
||||
|
||||
Changed `AutoStartManager.SetAutoStart()` to use `Registry.CurrentUser.CreateSubKey(...)` instead of `OpenSubKey(...)`, so it can create the Run key/value when missing instead of silently returning.
|
||||
|
||||
Manual registry validation:
|
||||
|
||||
```powershell
|
||||
Get-ItemProperty 'HKCU:\Software\Microsoft\Windows\CurrentVersion\Run' -Name OpenClawTray -ErrorAction SilentlyContinue
|
||||
```
|
||||
|
||||
## Validation
|
||||
|
||||
Ran `./build.ps1`: passed. Per fast-loop directive, skipped `dotnet test`.
|
||||
58
.squad/decisions/inbox/mattingly-finish-actually-hub.md
Normal file
58
.squad/decisions/inbox/mattingly-finish-actually-hub.md
Normal file
@ -0,0 +1,58 @@
|
||||
# Mattingly — PR #274 finish should open Hub chat
|
||||
|
||||
## Audit
|
||||
|
||||
Command requested: `grep -rn "launching chat\|ShowChatWindow\|ShowHub\|OnWizardComplete" src/OpenClaw.Tray.WinUI` (run with ripgrep equivalent because `rg` was not on PATH in PowerShell; Copilot rg tool was used against the same tree).
|
||||
|
||||
HEAD before this fix: `8c68111 Launch hub chat after onboarding`.
|
||||
|
||||
Matches found:
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:498` — tray icon click calls `ShowChatWindow()`.
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:501` — `ShowChatWindow()` method.
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:542` — `ShowChatWindow` deferred-show warning string.
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:644` — tray menu `openchat` calls `ShowChatWindow()`.
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:562,581,647,652,654,710,1043,1855,2809,2928,3048,3101,3603,4265` — `ShowHub(...)` method/call sites.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:587` — Finish event calls `OnWizardComplete()`.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:596` — X/Closed path calls `OnWizardComplete()`.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:620` — single `OnWizardComplete()` implementation.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:649` — required diagnostic log line.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:650,658,660,667,671,675,679` — deferred Hub chat launch helper.
|
||||
- Documentation/comment-only references in `ChatWindow.xaml.cs`, `HubWindow.xaml.cs`, `VoiceOverlayWindow.xaml.cs`, and `OnboardingState.cs`.
|
||||
|
||||
The literal old string `launching chat` has no remaining source match in this worktree.
|
||||
|
||||
## Diagnosis
|
||||
|
||||
The log Mike captured (`[OnboardingWindow] OnWizardComplete launching chat`) corresponds to the pre-`8c68111` body of `OnboardingWindow.OnWizardComplete` in `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs`, the only wizard-completion implementation. In the current clean worktree, `8c68111` did change that exact method to log `[OnboardingWindow] OnWizardComplete launching HubWindow on chat tab` and call `App.ShowHub("chat")`.
|
||||
|
||||
I did not find a second `OnWizardComplete`, overload, post-finish hook, or hidden `ShowHub` fallback to `ChatWindow`. `App.ShowHub(...)` creates a `HubWindow` when `_hubWindow` is null/closed, sets state, navigates, and activates it. The remaining `ShowChatWindow()` calls are tray quick-chat entry points, not wizard finish paths.
|
||||
|
||||
The prior fix therefore did not take in the live run because that run was not executing source/binaries containing `8c68111` (or was launched from another stale build/worktree). To make the wizard finish path more robust and easier to verify, this follow-up keeps the exact required log line and dispatches `ShowHub("chat")` at low priority after the wizard close event settles, so the Hub opens after the wizard finishes closing and cannot lose an ordering fight to wizard teardown.
|
||||
|
||||
## Changes
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs`
|
||||
- Keeps the required log line: `[OnboardingWindow] OnWizardComplete launching HubWindow on chat tab`.
|
||||
- Replaces the inline post-finish call with `ShowHubChatAfterWizardClose()`.
|
||||
- The helper dispatches `App.ShowHub("chat")` on the UI dispatcher at low priority, with a direct fallback if enqueue fails.
|
||||
- Adds an explicit warning if `Application.Current` is not the tray `App`.
|
||||
- Updates stale bootstrap comment from `App.ShowChatWindow()` to HubWindow chat navigation.
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/Services/OnboardingState.cs`
|
||||
- Updates stale route comment to say the Ready path launches the Hub chat tab, not the old chat window.
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/Services/BootstrapMessageInjector.cs`
|
||||
- Updates stale comment to describe HubWindow chat page injection instead of post-wizard `App.ShowChatWindow()`.
|
||||
|
||||
## Validation
|
||||
|
||||
- `git pull --rebase fork feat/wsl-gateway-clean` before commit: already up to date.
|
||||
- `./build.ps1`: passed.
|
||||
- Tests intentionally not run per active directive: NO tests, incremental `./build.ps1` only.
|
||||
|
||||
## Verification log line
|
||||
|
||||
Mike should verify this exact line on the next finish run:
|
||||
|
||||
`[OnboardingWindow] OnWizardComplete launching HubWindow on chat tab`
|
||||
21
.squad/decisions/inbox/mattingly-finish-launch-hubwindow.md
Normal file
21
.squad/decisions/inbox/mattingly-finish-launch-hubwindow.md
Normal file
@ -0,0 +1,21 @@
|
||||
# Mattingly: Finish opens HubWindow chat
|
||||
|
||||
## Summary
|
||||
Onboarding completion from Ready now launches the full HubWindow directly on the Chat tab instead of the standalone quick-chat ChatWindow.
|
||||
|
||||
## Changes
|
||||
- `src\OpenClaw.Tray.WinUI\App.xaml.cs`
|
||||
- Made `ShowHub(string? navigateTo = null, bool activate = true)` internal so onboarding can reuse the existing hub-opening path.
|
||||
- `src\OpenClaw.Tray.WinUI\Onboarding\OnboardingWindow.cs`
|
||||
- Replaced `ShowChatWindow()` completion launch with `ShowHub("chat")`.
|
||||
- Added diagnostic log: `[OnboardingWindow] OnWizardComplete launching HubWindow on chat tab`.
|
||||
- `src\OpenClaw.Tray.WinUI\Pages\ChatPage.xaml.cs`
|
||||
- Wired `BootstrapMessageInjector.InjectAsync` into the Hub chat WebView2 `NavigationCompleted` success path, matching the standalone `ChatWindow` gated injection behavior.
|
||||
|
||||
## Validation
|
||||
- Ran `./build.ps1` successfully after the code change.
|
||||
- Per active session directive, did not run tests after the fix.
|
||||
|
||||
## Architectural notes
|
||||
- Hub already exposes tag-based navigation through `NavigateTo("chat")`; `ShowHub("chat")` selects the existing NavigationView item and navigates to `ChatPage`.
|
||||
- Bootstrap injection remains wired in both standalone `ChatWindow` and Hub `ChatPage`; the existing global `Settings.HasInjectedFirstRunBootstrap` gate ensures only one path injects.
|
||||
29
AGENTS.md
Normal file
29
AGENTS.md
Normal file
@ -0,0 +1,29 @@
|
||||
# AGENTS.md
|
||||
|
||||
## Required Validation After Every Change
|
||||
|
||||
All agents working in this repository must run validation after each code change before marking work complete.
|
||||
|
||||
Required steps:
|
||||
|
||||
1. Run full repo build:
|
||||
- `./build.ps1`
|
||||
2. Run shared tests:
|
||||
- `dotnet test ./tests/OpenClaw.Shared.Tests/OpenClaw.Shared.Tests.csproj --no-restore`
|
||||
3. Run tray tests:
|
||||
- `dotnet test ./tests/OpenClaw.Tray.Tests/OpenClaw.Tray.Tests.csproj --no-restore`
|
||||
|
||||
If a command fails:
|
||||
|
||||
1. Fix the issue.
|
||||
2. Re-run the failed command.
|
||||
3. Re-run all required validation commands before completion.
|
||||
|
||||
Notes:
|
||||
|
||||
- If a build/test is blocked by an environmental lock (for example running executable locking output assemblies), stop/close the locking process and rerun.
|
||||
- In linked git worktrees, set `OPENCLAW_REPO_ROOT` to the worktree path before running tests that discover the repository root, for example:
|
||||
- `$env:OPENCLAW_REPO_ROOT='D:\github\moltbot-windows-hub.<worktree-name>'`
|
||||
- Tray tests must isolate `SettingsManager` from real user settings. Do not use `new SettingsManager()` in tests unless the test intentionally reads `%APPDATA%\OpenClawTray\settings.json`; pass a temp settings directory or set `OPENCLAW_TRAY_DATA_DIR` before the test process starts.
|
||||
- Prefer isolated worktrees for PR validation. Use `git-wt` for worktree workflows; `wt.exe` may resolve to WorkTrunk instead of Windows Terminal, so use the full Windows Terminal path when explicitly launching Terminal.
|
||||
- Do not claim completion without reporting validation results.
|
||||
@ -67,13 +67,11 @@ openclaw-windows-hub/
|
||||
├── .github/workflows/
|
||||
│ └── ci.yml # GitHub Actions CI/CD workflow
|
||||
│
|
||||
├── moltbot-windows-hub.slnx # Solution file (historical name)
|
||||
├── openclaw-windows-node.slnx # Solution file
|
||||
├── README.md # User-facing documentation
|
||||
└── DEVELOPMENT.md # This file
|
||||
```
|
||||
|
||||
> **Note on Naming:** The solution file is named `moltbot-windows-hub.slnx` due to the project's history (formerly known as Moltbot, formerly known as Clawdbot). The repository and current branding use "OpenClaw".
|
||||
|
||||
### Project Dependencies
|
||||
|
||||
```
|
||||
@ -89,7 +87,7 @@ OpenClaw.Tray.Tests ──tests──▶ OpenClaw.Shared
|
||||
|-----------|----------|---------|
|
||||
| **Gateway Communication** | `OpenClaw.Shared/OpenClawGatewayClient.cs` | WebSocket client with protocol v3, reconnect/backoff logic |
|
||||
| **Notification System** | `OpenClaw.Tray.WinUI/App.xaml.cs` | Event routing, toast notifications, classification |
|
||||
| **WebView2 Integration** | `OpenClaw.Tray.WinUI/Windows/WebChatWindow.xaml.cs` | Embedded chat panel with lifecycle management |
|
||||
| **WebView2 Integration** | `OpenClaw.Tray.WinUI/Windows/ChatWindow.xaml.cs` | Embedded chat panel with lifecycle management |
|
||||
| **Tray Icon Management** | `OpenClaw.Tray.WinUI/Helpers/IconHelper.cs` | GDI handle management, dynamic icon generation |
|
||||
| **Session Tracking** | `OpenClaw.Shared/OpenClawGatewayClient.cs` | Session state, activity tracking, polling |
|
||||
| **Settings & Logging** | `OpenClaw.Tray.WinUI/Services/` | JSON settings persistence, file rotation logging |
|
||||
@ -168,7 +166,7 @@ dotnet build -p:EnableWindowsTargeting=true
|
||||
|
||||
#### Visual Studio
|
||||
|
||||
1. Open `moltbot-windows-hub.slnx` in Visual Studio 2022
|
||||
1. Open `openclaw-windows-node.slnx` in Visual Studio 2022
|
||||
2. Set `OpenClaw.Tray.WinUI` as the startup project
|
||||
3. Press F5 to run with debugging
|
||||
|
||||
@ -287,7 +285,7 @@ Notifications are classified using two strategies:
|
||||
|
||||
### WebView2 Lifecycle
|
||||
|
||||
The `WebChatWindow` uses Microsoft Edge WebView2 for embedded web content:
|
||||
The `ChatWindow` uses Microsoft Edge WebView2 for embedded web content:
|
||||
|
||||
**Initialization:**
|
||||
1. WebView2 control created in XAML
|
||||
@ -301,7 +299,7 @@ Window Created → WebView2.EnsureCoreWebView2Async() → Navigate to Chat URL
|
||||
```
|
||||
|
||||
**Key Design Decisions:**
|
||||
- **Singleton pattern**: Only one WebChat window instance exists
|
||||
- **Singleton pattern**: Only one chat window instance exists
|
||||
- **Hidden instead of disposed**: Window is hidden when closed to preserve state
|
||||
- **Separate user data folder**: Isolates cookies/storage from browser
|
||||
- **Navigation guard**: Prevents accidental navigation away from chat
|
||||
@ -427,8 +425,8 @@ dotnet test --filter "FullyQualifiedName~AgentActivityTests"
|
||||
```
|
||||
|
||||
**Test Coverage:**
|
||||
- ✅ **478 tests** in `OpenClaw.Shared.Tests` — models, gateway client, exec approvals, capabilities, URL helpers, notification categorization, shell quoting
|
||||
- ✅ **93 tests** in `OpenClaw.Tray.Tests` — menu display, menu positioning, settings round-trip, deep link parsing
|
||||
- ✅ **1182 tests** in `OpenClaw.Shared.Tests` — models, gateway client, exec approvals, capabilities, URL helpers, notification categorization, shell quoting, MCP, device identity, and WinNode client coverage
|
||||
- ✅ **388 tests** in `OpenClaw.Tray.Tests` — settings round-trip, deep link parsing, onboarding state, setup code decoder, gateway health/chat helpers, security validation, wizard step parsing, gateway discovery, localization validation
|
||||
- ✅ All tests are pure unit tests (no network, no file system, no external dependencies)
|
||||
|
||||
See [tests/OpenClaw.Shared.Tests/README.md](tests/OpenClaw.Shared.Tests/README.md) for detailed test documentation.
|
||||
@ -443,7 +441,7 @@ You can test the UI and basic functionality without a running gateway:
|
||||
3. Enter a dummy gateway URL (e.g., `ws://localhost:18789`)
|
||||
4. The app will show "Disconnected" status but you can:
|
||||
- Test the tray menu structure
|
||||
- Open Settings dialog and configure preferences
|
||||
- Open the Settings page and configure preferences
|
||||
- Test auto-start functionality
|
||||
- View logs
|
||||
|
||||
@ -489,8 +487,8 @@ You can test the UI and basic functionality without a running gateway:
|
||||
- Verify Windows toast notification appears (if enabled)
|
||||
- Click toast → should open relevant UI
|
||||
|
||||
2. **Notification History**:
|
||||
- Right-click tray → **Notification History**
|
||||
2. **Activity / notification history**:
|
||||
- Right-click tray → **Activity Stream** or **Notification History**
|
||||
- Verify past notifications are listed
|
||||
- Test filtering by category
|
||||
|
||||
@ -749,6 +747,51 @@ gh run download <run-id> --repo shanselman/openclaw-windows-hub
|
||||
- **Discussions**: [GitHub Discussions](https://github.com/shanselman/openclaw-windows-hub/discussions)
|
||||
- **Documentation**: [OpenClaw Docs](https://docs.molt.bot)
|
||||
|
||||
## Developing & Testing the Onboarding Wizard
|
||||
|
||||
The onboarding wizard is a 6-screen flow built with OpenClaw's minimal FunctionalUI helper layer for declarative C# WinUI. The chat page uses a WebView2 overlay for visual consistency with the post-setup chat experience.
|
||||
|
||||
### Building
|
||||
|
||||
The WinUI project requires platform-specific build targets. Use the build script:
|
||||
|
||||
```bash
|
||||
./build.ps1 -Project WinUI # Builds with correct -r win-x64 targets
|
||||
```
|
||||
|
||||
Direct `dotnet build` without the script will fail with "WindowsAppSDKSelfContained requires a supported Windows architecture".
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Purpose |
|
||||
|----------|---------|
|
||||
| `OPENCLAW_FORCE_ONBOARDING=1` | Show onboarding wizard even if a token already exists |
|
||||
| `OPENCLAW_SKIP_UPDATE_CHECK=1` | Skip the update dialog (useful during testing) |
|
||||
| `OPENCLAW_LANGUAGE=fr-fr` | Override UI language (validated: en-us, fr-fr, nl-nl, zh-cn, zh-tw) |
|
||||
| `OPENCLAW_GATEWAY_PORT=19001` | Override default gateway port for local dev |
|
||||
| `OPENCLAW_VISUAL_TEST=1` | Enable automatic screenshot capture on page transitions |
|
||||
| `OPENCLAW_VISUAL_TEST_DIR=path` | Output directory for visual test screenshots |
|
||||
|
||||
### Testing the Wizard Locally
|
||||
|
||||
1. Start a local gateway (e.g., in WSL): `cd ~/openclaw && npx openclaw gateway`
|
||||
2. Set env vars:
|
||||
```powershell
|
||||
$env:OPENCLAW_FORCE_ONBOARDING = "1"
|
||||
$env:OPENCLAW_SKIP_UPDATE_CHECK = "1"
|
||||
```
|
||||
3. Build and run: `./build.ps1 -Project WinUI` then launch the exe
|
||||
4. Navigate through all 6 screens to verify
|
||||
|
||||
### Architecture
|
||||
|
||||
- **FunctionalUI**: `src/OpenClawTray.FunctionalUI/` — Minimal declarative WinUI helper layer used by onboarding
|
||||
- **Pages**: `src/OpenClaw.Tray.WinUI/Onboarding/Pages/` — Functional UI components for each wizard screen
|
||||
- **Services**: `src/OpenClaw.Tray.WinUI/Onboarding/Services/` — State management, setup code decoder, permission checker, health check, input validation
|
||||
- **Widgets**: `src/OpenClaw.Tray.WinUI/Onboarding/Widgets/` — Shared UI components (cards, step indicators, feature rows)
|
||||
- **Window**: `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs` — Host window with WebView2 overlay for chat
|
||||
- **Helpers**: `src/OpenClaw.Tray.WinUI/Helpers/GatewayChatHelper.cs` — Shared WebView2 chat URL builder
|
||||
|
||||
---
|
||||
|
||||
*Made with 🦞 love by Scott Hanselman and the OpenClaw community*
|
||||
|
||||
12
NuGet.Config
Normal file
12
NuGet.Config
Normal file
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<configuration>
|
||||
<packageSources>
|
||||
<clear />
|
||||
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" protocolVersion="3" />
|
||||
</packageSources>
|
||||
<packageSourceMapping>
|
||||
<packageSource key="nuget.org">
|
||||
<package pattern="*" />
|
||||
</packageSource>
|
||||
</packageSourceMapping>
|
||||
</configuration>
|
||||
188
README.md
188
README.md
@ -10,22 +10,25 @@ A Windows companion suite for [OpenClaw](https://openclaw.ai) - the AI-powered p
|
||||
|
||||
## Projects
|
||||
|
||||
This monorepo contains three projects:
|
||||
This monorepo contains four projects:
|
||||
|
||||
| Project | Description |
|
||||
|---------|-------------|
|
||||
| **OpenClaw.Tray.WinUI** | System tray application (WinUI 3) for quick access to OpenClaw |
|
||||
| **OpenClaw.Shared** | Shared gateway client library |
|
||||
| **OpenClaw.Cli** | CLI validator for WebSocket connect/send/probe using tray settings |
|
||||
| **OpenClaw.CommandPalette** | PowerToys Command Palette extension |
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
> **End-user installer?** See [docs/SETUP.md](docs/SETUP.md) for a step-by-step installation guide (no build required).
|
||||
|
||||
### Prerequisites
|
||||
- Windows 10 (20H2+) or Windows 11
|
||||
- .NET 10.0 SDK - https://dotnet.microsoft.com/download/dotnet/10.0
|
||||
- Windows 10 SDK (for WinUI build) - install via Visual Studio or standalone
|
||||
- WebView2 Runtime - pre-installed on modern Windows, or get from https://developer.microsoft.com/microsoft-edge/webview2
|
||||
- PowerToys (optional, for Command Palette extension)
|
||||
- PowerToys (optional, for Command Palette extension) — see [docs/POWERTOYS.md](docs/POWERTOYS.md)
|
||||
|
||||
### Build
|
||||
|
||||
@ -65,6 +68,24 @@ dotnet build src/OpenClaw.Tray.WinUI -r win-x64 -p:PackageMsix=true # x64 MSI
|
||||
.\src\OpenClaw.Tray.WinUI\bin\Debug\net10.0-windows10.0.19041.0\win-x64\OpenClaw.Tray.WinUI.exe # x64
|
||||
```
|
||||
|
||||
### Run CLI WebSocket Validator
|
||||
|
||||
Use the CLI to validate gateway connectivity and `chat.send` outside the tray UI.
|
||||
|
||||
```powershell
|
||||
# Show help
|
||||
dotnet run --project src/OpenClaw.Cli -- --help
|
||||
|
||||
# Use tray settings from %APPDATA%\OpenClawTray\settings.json and send one message
|
||||
dotnet run --project src/OpenClaw.Cli -- --message "quick send validation"
|
||||
|
||||
# Loop sends and also probe sessions/usage/nodes APIs
|
||||
dotnet run --project src/OpenClaw.Cli -- --repeat 5 --delay-ms 1000 --probe-read --verbose
|
||||
|
||||
# Override gateway URL/token for isolated testing
|
||||
dotnet run --project src/OpenClaw.Cli -- --url ws://127.0.0.1:18789 --token "<token>" --message "override test"
|
||||
```
|
||||
|
||||
## 📦 OpenClaw.Tray (Molty)
|
||||
|
||||
Modern Windows 11-style system tray companion that connects to your local OpenClaw gateway.
|
||||
@ -76,24 +97,41 @@ Modern Windows 11-style system tray companion that connects to your local OpenCl
|
||||
- 🔄 **Auto-updates** - Automatic updates from GitHub Releases
|
||||
- 🌐 **Web Chat** - Embedded chat window with WebView2
|
||||
- 📊 **Live Status** - Real-time sessions, channels, and usage display
|
||||
- ⚡ **Activity Stream** - Dedicated flyout for live session, usage, node, and notification events
|
||||
- 🧭 **Command Center** - Dense gateway, channel, usage, node, pairing, and allowlist diagnostics from one window
|
||||
- ⚡ **Activity Stream** - Command Center page for live session, usage, node, and notification events
|
||||
- 🔔 **Toast Notifications** - Clickable Windows notifications with [smart categorization](docs/NOTIFICATION_CATEGORIZATION.md)
|
||||
- 📡 **Channel Control** - Start/stop Telegram & WhatsApp from the menu
|
||||
- 🖥️ **Node Observability** - Node inventory with online/offline state and copyable summary
|
||||
- ⏱ **Cron Jobs** - Quick access to scheduled tasks
|
||||
- 🚀 **Auto-start** - Launch with Windows
|
||||
- ⚙️ **Settings** - Full configuration dialog
|
||||
- 🎯 **First-run experience** - Welcome dialog guides new users
|
||||
- ⚙️ **Settings** - Full configuration page
|
||||
- 🎯 **First-run onboarding** — 6-screen setup wizard (connection, permissions, chat, configuration)
|
||||
|
||||
#### Quick Send scope requirement
|
||||
|
||||
Quick Send uses the gateway `chat.send` method and requires the operator device to have `operator.write` scope.
|
||||
|
||||
If Quick Send fails with `missing scope: operator.write`, Molty now copies identity + remediation guidance to your clipboard, including:
|
||||
|
||||
- operator role and `client.id` used by the tray app
|
||||
- gateway-reported operator device id (if provided)
|
||||
- currently granted scopes (if provided)
|
||||
|
||||
For this specific error (`missing scope: operator.write`), the cause is an **operator token scope issue**. Update the token used by the tray app so it includes `operator.write`, then retry Quick Send.
|
||||
|
||||
If Quick Send fails with `pairing required` / `NOT_PAIRED`, that is a **device approval** issue. Approve the tray device in gateway pairing approvals, reconnect, and retry.
|
||||
|
||||
### Menu Sections
|
||||
- **Status** - Gateway connection status with click-to-view details
|
||||
- **Command Center** - Hub with diagnostics, channel health, usage, sessions, nodes, and copyable repair commands
|
||||
- **Sessions** - Active agent sessions with preview and per-session controls
|
||||
- **Usage** - Provider/cost summary with quick jump to activity details
|
||||
- **Channels** - Telegram/WhatsApp status with toggle control
|
||||
- **Nodes** - Online/offline node inventory and copyable summary
|
||||
- **Recent Activity** - Timestamped event stream for sessions, usage, nodes, and notifications
|
||||
- **Actions** - Dashboard, Web Chat, Quick Send, Activity Stream, History
|
||||
- **Settings** - Configuration, auto-start, logs
|
||||
- **Support & Debug** - Logs, config, diagnostics folder, redacted support context, browser setup, port/capability/node/channel/activity summaries, and managed SSH tunnel restart
|
||||
- **Settings** - Configuration and auto-start
|
||||
|
||||
### Mac Parity Status
|
||||
|
||||
@ -103,11 +141,11 @@ Comparing against [openclaw-menubar](https://github.com/magimetal/openclaw-menub
|
||||
|---------|-----|---------|-------|
|
||||
| Menu bar/tray icon | ✅ | ✅ | Color-coded status |
|
||||
| Gateway status display | ✅ | ✅ | Connected/Disconnected |
|
||||
| PID display | ✅ | ❌ | Mac shows gateway PID |
|
||||
| PID display | ✅ | ✅ | Command Center shows gateway listener process/PID |
|
||||
| Channel status | ✅ | ✅ | Mac: Discord / Win: Telegram+WhatsApp |
|
||||
| Sessions count | ✅ | ✅ | |
|
||||
| Last check timestamp | ✅ | ✅ | Shown in tray tooltip |
|
||||
| Gateway start/stop/restart | ✅ | ❌ | Mac controls gateway process |
|
||||
| Gateway start/stop/restart | ✅ | ⚠️ | Windows can restart the managed SSH tunnel from tray Support & Debug and Command Center; external gateway process control is not implemented |
|
||||
| View Logs | ✅ | ✅ | |
|
||||
| Open Web UI | ✅ | ✅ | |
|
||||
| Refresh | ✅ | ✅ | Auto-refresh on menu open |
|
||||
@ -126,7 +164,7 @@ These features are available in Windows but not in the Mac app:
|
||||
| Channel control | Start/stop Telegram & WhatsApp |
|
||||
| Modern flyout menu | Windows 11-style with dark/light mode |
|
||||
| Deep links | `openclaw://` URL scheme with IPC |
|
||||
| First-run welcome | Guided onboarding for new users |
|
||||
| First-run onboarding | 6-screen guided setup wizard (Welcome → Connection → Wizard → Permissions → Chat → Ready) |
|
||||
| PowerToys integration | Command Palette extension |
|
||||
|
||||
### 🔌 Node Mode (Agent Control)
|
||||
@ -136,9 +174,15 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t
|
||||
| Capability | Commands | Description |
|
||||
|------------|----------|-------------|
|
||||
| **System** | `system.notify`, `system.run`, `system.run.prepare`, `system.which`, `system.execApprovals.get`, `system.execApprovals.set` | Show Windows toast notifications, execute commands with policy controls |
|
||||
| **Canvas** | `canvas.present`, `canvas.hide`, `canvas.navigate`, `canvas.eval`, `canvas.snapshot`, `canvas.a2ui.push`, `canvas.a2ui.reset` | Display and control a WebView2 window |
|
||||
| **Screen** | `screen.capture`, `screen.list` | Capture screenshots |
|
||||
| **Camera** | `camera.list`, `camera.snap` | Enumerate cameras and capture a still photo |
|
||||
| **Canvas** | `canvas.present`, `canvas.hide`, `canvas.navigate`, `canvas.eval`, `canvas.snapshot`, `canvas.a2ui.push`, `canvas.a2ui.pushJSONL`, `canvas.a2ui.reset` | Display and control a WebView2 window |
|
||||
| **Screen** | `screen.snapshot`, `screen.record` | Capture screenshots and fixed-duration MP4 screen recordings |
|
||||
| **Camera** | `camera.list`, `camera.snap`, `camera.clip` | Enumerate cameras and capture still photos or short video clips |
|
||||
| **Speech-to-text** | `stt.transcribe` | Capture audio from the default microphone for a bounded duration and return transcribed text. Default-off; opt-in via Settings. When enabled, advertised to both gateway callers (subject to gateway allowlist) and local MCP clients (subject to bearer token). |
|
||||
| **Location** | `location.get` | Return Windows geolocation when permission is available |
|
||||
| **Device** | `device.info`, `device.status` | Return Windows host/app metadata and lightweight status |
|
||||
| **Text-to-speech** | `tts.speak` | Speak text aloud through Windows speech synthesis, or ElevenLabs when configured |
|
||||
|
||||
Packaged installs declare camera, microphone, and location capabilities. Windows may ask for consent the first time a node capability uses one of those protected resources.
|
||||
|
||||
#### Node Setup
|
||||
|
||||
@ -167,17 +211,22 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t
|
||||
"canvas.eval",
|
||||
"canvas.snapshot",
|
||||
"canvas.a2ui.push",
|
||||
"canvas.a2ui.pushJSONL",
|
||||
"canvas.a2ui.reset",
|
||||
"screen.capture",
|
||||
"screen.list",
|
||||
"screen.snapshot",
|
||||
"camera.list",
|
||||
"camera.snap"
|
||||
"camera.snap",
|
||||
"camera.clip",
|
||||
"location.get",
|
||||
"device.info",
|
||||
"device.status",
|
||||
"tts.speak"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
> ⚠️ **Important**: The gateway has a server-side allowlist. Commands must be listed explicitly - wildcards like `canvas.*` don't work!
|
||||
> ⚠️ **Important**: The gateway has a server-side allowlist. Commands must be listed explicitly - wildcards like `canvas.*` don't work! Privacy-sensitive commands such as `screen.record` and agent-driven audio playback via `tts.speak` should only be added to `allowCommands` when you explicitly want to allow them.
|
||||
|
||||
5. **Test it** from your Mac/gateway:
|
||||
```bash
|
||||
@ -191,10 +240,13 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t
|
||||
openclaw nodes canvas eval --node <id> --javaScript "document.title"
|
||||
|
||||
# Render A2UI JSONL in the canvas (pass the file contents as a string)
|
||||
openclaw nodes canvas a2ui push --node <id> --jsonl "$(Get-Content -Raw .\\ui.jsonl)"
|
||||
openclaw nodes canvas a2ui push --node <id> --jsonl "$(cat ./ui.jsonl)"
|
||||
|
||||
# Take a screenshot
|
||||
openclaw nodes invoke --node <id> --command screen.capture --params '{"screenIndex":0,"format":"png"}'
|
||||
openclaw nodes invoke --node <id> --command screen.snapshot --params '{"screenIndex":0,"format":"png"}'
|
||||
|
||||
# Record a short screen clip (requires explicitly allowing screen.record on the gateway)
|
||||
openclaw nodes screen record --node <id> --duration 3000 --fps 10 --screen 0 --no-audio --out /tmp/openclaw-windows-screen-record-test.mp4 --json
|
||||
|
||||
# List cameras
|
||||
openclaw nodes invoke --node <id> --command camera.list
|
||||
@ -202,6 +254,9 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t
|
||||
# Take a photo (NV12/MediaCapture fallback)
|
||||
openclaw nodes invoke --node <id> --command camera.snap --params '{"deviceId":"<device-id>","format":"jpeg","quality":80}'
|
||||
|
||||
# Speak text aloud on the Windows node (requires TTS enabled in Settings and tts.speak allowed on the gateway)
|
||||
openclaw nodes invoke --node <id> --command tts.speak --params '{"text":"Hello from OpenClaw","provider":"windows"}'
|
||||
|
||||
# Execute a command on the Windows node
|
||||
openclaw nodes invoke --node <id> --command system.run --params '{"command":"Get-Process | Select -First 5","shell":"powershell","timeoutMs":10000}'
|
||||
|
||||
@ -215,7 +270,17 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t
|
||||
|
||||
> 🔒 **Exec Policy**: `system.run` is gated by an approval policy on the Windows node at `%LOCALAPPDATA%\OpenClawTray\exec-policy.json` (schema: `{ "defaultAction": "...", "rules": [...] }`). This is separate from gateway-side `~/.openclaw/exec-approvals.json`.
|
||||
>
|
||||
> Rules are matched against the `command` token (`argv[0]`). If your call runs `powershell.exe -File script.ps1`, allow `powershell.exe`/`pwsh.exe` (not just the script path), or you'll get `No matching rule; default policy applied`.
|
||||
> Rules are matched against the full command line. Known wrapper payloads such as `cmd /c ...`, `powershell -Command ...`, `pwsh -EncodedCommand ...`, and `bash -c ...` are also evaluated before execution. Dangerous environment overrides like `PATH`, `PATHEXT`, `NODE_OPTIONS`, `GIT_SSH_COMMAND`, `LD_*`, and `DYLD_*` are rejected.
|
||||
|
||||
#### Command Center diagnostics
|
||||
|
||||
Open the status detail/Command Center from the tray menu or with `openclaw://commandcenter`. It shows:
|
||||
|
||||
- channel health from gateway `health` events, including node-mode health received without a separate operator connection
|
||||
- active sessions, usage/cost data, node inventory, declared commands, and Mac parity notes
|
||||
- allowlist diagnostics that separate safe companion commands from privacy-sensitive opt-ins like `screen.record`, `camera.snap`, and `camera.clip`
|
||||
- copyable repair commands for safe allowlist fixes and pending pairing approval
|
||||
- recent activity and node invoke results through the Activity Stream, storing command names/status/duration only (not payloads, screenshots, recordings, or secrets)
|
||||
>
|
||||
> ```bash
|
||||
> openclaw nodes invoke --node <id> --command system.execApprovals.set --params '{"rules":[{"pattern":"powershell.exe","action":"allow"},{"pattern":"pwsh.exe","action":"allow"},{"pattern":"echo *","action":"allow"},{"pattern":"*","action":"deny"}],"defaultAction":"deny"}'
|
||||
@ -237,12 +302,35 @@ OpenClaw registers the `openclaw://` URL scheme for automation and integration:
|
||||
|
||||
| Link | Description |
|
||||
|------|-------------|
|
||||
| `openclaw://settings` | Open Settings dialog |
|
||||
| `openclaw://chat` | Open Web Chat window |
|
||||
| `openclaw://settings` | Open the Settings page |
|
||||
| `openclaw://setup` | Open Setup Wizard |
|
||||
| `openclaw://chat` | Open the Chat page |
|
||||
| `openclaw://commandcenter` | Open Command Center diagnostics |
|
||||
| `openclaw://activity` | Open the Activity page |
|
||||
| `openclaw://history` | Open the Activity page filtered to notification history |
|
||||
| `openclaw://dashboard` | Open Dashboard in browser |
|
||||
| `openclaw://dashboard/sessions` | Open specific dashboard page |
|
||||
| `openclaw://dashboard/channels` | Open Channels dashboard page |
|
||||
| `openclaw://dashboard/skills` | Open Skills dashboard page |
|
||||
| `openclaw://dashboard/cron` | Open Cron dashboard page |
|
||||
| `openclaw://healthcheck` | Run a manual health check |
|
||||
| `openclaw://check-updates` | Run a manual update check |
|
||||
| `openclaw://logs` | Open the current tray log file |
|
||||
| `openclaw://log-folder` | Open the logs folder |
|
||||
| `openclaw://config` | Open the config folder |
|
||||
| `openclaw://diagnostics` | Open the diagnostics JSONL folder |
|
||||
| `openclaw://support-context` | Copy redacted support context |
|
||||
| `openclaw://debug-bundle` | Copy a combined debug bundle for support |
|
||||
| `openclaw://browser-setup` | Copy browser.proxy/browser-control setup guidance |
|
||||
| `openclaw://port-diagnostics` | Copy gateway/browser/tunnel port diagnostics with owner PID stop hints |
|
||||
| `openclaw://capability-diagnostics` | Copy permissions, allowlist, and parity diagnostics |
|
||||
| `openclaw://node-inventory` | Copy node capabilities, commands, and policy status |
|
||||
| `openclaw://channel-summary` | Copy channel health and start/stop availability |
|
||||
| `openclaw://activity-summary` | Copy recent tray activity for troubleshooting |
|
||||
| `openclaw://extensibility-summary` | Copy channel, skills, and cron dashboard surface guidance |
|
||||
| `openclaw://restart-ssh-tunnel` | Restart the tray-managed SSH tunnel when enabled |
|
||||
| `openclaw://send?message=Hello` | Open Quick Send with pre-filled text |
|
||||
| `openclaw://agent?message=Hello` | Send message directly (with confirmation) |
|
||||
| `openclaw://agent?message=Hello` | Send message directly to the connected gateway |
|
||||
|
||||
Deep links work even when Molty is already running - they're forwarded via IPC.
|
||||
|
||||
@ -251,18 +339,38 @@ Deep links work even when Molty is already running - they're forwarded via IPC.
|
||||
PowerToys Command Palette extension for quick OpenClaw access.
|
||||
|
||||
### Commands
|
||||
- **🦞 Open Dashboard** - Launch web dashboard
|
||||
- **💬 Quick Send** - Send a message
|
||||
- **📊 Full Status** - View gateway status
|
||||
- **⚡ Sessions** - View active sessions
|
||||
- **📡 Channels** - View channel health
|
||||
- **🔄 Health Check** - Trigger health refresh
|
||||
- **🦞 Open Dashboard** - Launch the OpenClaw web dashboard
|
||||
- **💬 Dashboard: Sessions** - Open the sessions dashboard
|
||||
- **📡 Dashboard: Channels** - Open the channel configuration dashboard
|
||||
- **🧩 Dashboard: Skills** - Open the skills dashboard
|
||||
- **⏱️ Dashboard: Cron** - Open the scheduled jobs dashboard
|
||||
- **💬 Web Chat** - Open the embedded Chat page
|
||||
- **📝 Quick Send** - Open the Quick Send dialog to compose a message
|
||||
- **🧭 Setup Wizard** - Open pairing/setup
|
||||
- **🧭 Command Center** - Open diagnostics and support actions
|
||||
- **🔄 Run Health Check** - Refresh connection health
|
||||
- **⬇️ Check for Updates** - Run a manual GitHub Releases update check
|
||||
- **⚡ Activity Stream** - Open recent activity
|
||||
- **📋 Notification History** - Open notification history in the Activity page
|
||||
- **⚙️ Settings** - Open the OpenClaw Tray Settings page
|
||||
- **📄 Open Log File / 📁 Logs / 🗂️ Config / 🧪 Diagnostics** - Open support files and folders
|
||||
- **📋 Copy Support Context** - Copy redacted Command Center metadata
|
||||
- **🧰 Copy Debug Bundle** - Copy combined support, port, capability, node, channel, and activity diagnostics
|
||||
- **🌐 Copy Browser Setup** - Copy browser.proxy and node-host setup guidance
|
||||
- **🔌 Copy Port Diagnostics** - Copy gateway/browser/tunnel port owners and stop hints
|
||||
- **🛡️ Copy Capability Diagnostics** - Copy permission, allowlist, and parity diagnostics
|
||||
- **🖥️ Copy Node Inventory** - Copy node capabilities, commands, and policy status
|
||||
- **📡 Copy Channel Summary** - Copy channel health and start/stop availability
|
||||
- **⚡ Copy Activity Summary** - Copy recent tray activity
|
||||
- **🧩 Copy Extensibility Summary** - Copy channel, skills, and cron surface guidance
|
||||
- **🔁 Restart SSH Tunnel** - Restart the tray-managed SSH tunnel when enabled
|
||||
|
||||
### Installation
|
||||
1. Build the solution in Release mode
|
||||
2. Deploy the MSIX package via Visual Studio
|
||||
3. Open Command Palette (Win+Alt+Space)
|
||||
4. Type "OpenClaw" to see commands
|
||||
1. Run the OpenClaw Tray installer and tick **"Install PowerToys Command Palette extension"**, or
|
||||
2. Register manually: `Add-AppxPackage -Register "$env:LOCALAPPDATA\OpenClawTray\CommandPalette\AppxManifest.xml" -ForceApplicationShutdown`
|
||||
3. Open Command Palette (`Win+Alt+Space`) and type "OpenClaw" to see commands
|
||||
|
||||
See [docs/POWERTOYS.md](docs/POWERTOYS.md) for detailed setup and troubleshooting.
|
||||
|
||||
## 📦 OpenClaw.Shared
|
||||
|
||||
@ -286,7 +394,7 @@ openclaw-windows-node/
|
||||
│ └── OpenClaw.Tray.Tests/ # Tray app helper tests
|
||||
├── docs/
|
||||
│ └── molty1.png # Screenshot
|
||||
├── moltbot-windows-hub.slnx # Solution file (historical name)
|
||||
├── openclaw-windows-node.slnx # Solution file
|
||||
├── README.md
|
||||
├── LICENSE
|
||||
└── .gitignore
|
||||
@ -302,10 +410,16 @@ Default gateway: `ws://localhost:18789`
|
||||
|
||||
### First Run
|
||||
|
||||
On first run without a token, Molty displays a welcome dialog that:
|
||||
1. Explains what's needed to get started
|
||||
2. Links to [documentation](https://docs.molt.bot/web/dashboard) for token setup
|
||||
3. Opens Settings to configure the connection
|
||||
On first run, Molty launches a guided onboarding wizard that walks you through setup:
|
||||
|
||||
1. **Welcome** — introduces OpenClaw and starts the setup flow
|
||||
2. **Connection** — choose Local gateway, Remote gateway, or configure later. Paste a setup code or enter gateway URL and token manually. Tests the connection with Ed25519 device authentication.
|
||||
3. **Wizard** — gateway-driven configuration steps (AI provider selection, personality setup, communication channels). Steps are defined by your gateway.
|
||||
4. **Permissions** — reviews Windows system permissions (notifications, camera, microphone, screen capture, location) and links to system settings to grant them.
|
||||
5. **Chat** — meet your agent in a live chat powered by the gateway's web UI.
|
||||
6. **Ready** — summary of available features, option to launch at startup, and a Finish button.
|
||||
|
||||
For detailed setup instructions, see [docs/SETUP.md](docs/SETUP.md). For the full onboarding architecture, see [docs/ONBOARDING_WIZARD.md](docs/ONBOARDING_WIZARD.md).
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
Builds all projects, checks prerequisites, and provides clear guidance.
|
||||
|
||||
.PARAMETER Project
|
||||
Which project to build: All, Tray, WinUI, Shared, CommandPalette
|
||||
Which project to build: All, Tray, WinUI, Shared, CommandPalette, Cli
|
||||
Default: All
|
||||
|
||||
.PARAMETER Configuration
|
||||
@ -23,7 +23,7 @@
|
||||
#>
|
||||
|
||||
param(
|
||||
[ValidateSet("All", "Tray", "WinUI", "Shared", "CommandPalette")]
|
||||
[ValidateSet("All", "Tray", "WinUI", "Shared", "CommandPalette", "Cli", "WinNodeCli")]
|
||||
[string]$Project = "All",
|
||||
|
||||
[ValidateSet("Debug", "Release")]
|
||||
@ -187,12 +187,14 @@ function Build-Project($name, $path, $useRid = $false) {
|
||||
|
||||
$projects = @{
|
||||
"Shared" = @{ Path = "src/OpenClaw.Shared/OpenClaw.Shared.csproj"; UseRid = $false }
|
||||
"Cli" = @{ Path = "src/OpenClaw.Cli/OpenClaw.Cli.csproj"; UseRid = $false }
|
||||
"WinNodeCli" = @{ Path = "src/OpenClaw.WinNode.Cli/OpenClaw.WinNode.Cli.csproj"; UseRid = $false }
|
||||
"Tray" = @{ Path = "src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj"; UseRid = $true }
|
||||
"WinUI" = @{ Path = "src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj"; UseRid = $true }
|
||||
"CommandPalette" = @{ Path = "src/OpenClaw.CommandPalette/OpenClaw.CommandPalette.csproj"; UseRid = $false }
|
||||
}
|
||||
|
||||
$toBuild = if ($Project -eq "All") { @("Shared", "WinUI") } else { @($Project) }
|
||||
$toBuild = if ($Project -eq "All") { @("Shared", "Cli", "WinNodeCli", "WinUI") } else { @($Project) }
|
||||
|
||||
# Always build Shared first if building other projects
|
||||
if ($Project -ne "Shared" -and $Project -ne "All" -and $toBuild -notcontains "Shared") {
|
||||
|
||||
300
docs/A2UI_NATIVE_WINUI.md
Normal file
300
docs/A2UI_NATIVE_WINUI.md
Normal file
@ -0,0 +1,300 @@
|
||||
# Native WinUI A2UI Canvas — Design Spec
|
||||
|
||||
> **Status:** Draft / proposal
|
||||
> **Audience:** Contributors implementing a native A2UI renderer for the Windows node
|
||||
> **Target version:** A2UI v0.8 (parity with current openclaw clients), with a v0.9 migration path
|
||||
|
||||
## 1. Motivation
|
||||
|
||||
Today the Windows node renders A2UI by hosting a WebView2 control (`CanvasWindow`) that navigates to an HTTP page served by the openclaw gateway at `/__openclaw__/a2ui/`. That page bundles `@a2ui/lit` and openclaw's bridge JS. Pushed messages travel `agent → gateway → node (canvas.a2ui.push) → WebView2 → window.__a2ui.receive(msg)`.
|
||||
|
||||
That works, but it has costs:
|
||||
|
||||
- **Hard gateway dependency.** A node running in MCP-only mode (no gateway connection) silently drops A2UI pushes — `OnCanvasA2UIPush` bails when `_a2uiHostUrl` is null. The renderer code physically lives at the gateway.
|
||||
- **WebView2 surface area.** Drag/drop, IME, accessibility, focus, DPI, and keyboard shortcuts inherit WebView2 quirks instead of XAML's native behavior. The canvas always feels like an embedded browser.
|
||||
- **Bootstrapping latency.** Each cold start has to ensure WebView2 is ready, navigate, and wait for `window.__a2ui` to register before any message can be delivered (`EnsureA2UIHostAsync` + `ensureA2uiReady` polling).
|
||||
- **Theming drift.** WinUI windows around the canvas use Mica/Fluent; the canvas uses Lit components styled with CSS. Achieving consistent visuals requires duplicate theme work.
|
||||
- **Hardening.** Surface area for arbitrary script execution remains larger than necessary for what is fundamentally a declarative UI tree.
|
||||
|
||||
A native WinUI renderer renders A2UI surfaces directly into XAML — no WebView, no HTTP host, no JS bridge. The node becomes self-contained: it can render A2UI whether it's connected to a gateway, an MCP client, or both.
|
||||
|
||||
## 2. Goals & non-goals
|
||||
|
||||
### Goals
|
||||
|
||||
- **Render A2UI v0.8 standard-catalog surfaces natively** in the Windows node using WinUI 3 / XAML controls.
|
||||
- **Preserve the existing wire protocol.** Agents continue to send A2UI JSONL via `canvas.a2ui.push` / `canvas.a2ui.reset`. Nothing about the agent side changes.
|
||||
- **Work offline / gateway-less.** A WSL-less, gateway-less Windows node can still display rich UI from an MCP client.
|
||||
- **Match Fluent / WinUI design language** by default; allow theme overrides from the surface payload.
|
||||
- **Stream incremental updates** without flicker (component adds/updates/deletes mid-task).
|
||||
|
||||
### Non-goals (initial release)
|
||||
|
||||
- No A2UI v0.9 features (bidirectional messaging, prompt-first generation, modular schemas).
|
||||
- No HTML/JS/CSS escape hatch from inside an A2UI surface (the v0.8 catalog has no such primitive — keep it that way).
|
||||
- No replacement for `canvas.present` / `canvas.navigate` / `canvas.eval`. Those continue to use WebView2 for general web content. Only A2UI rendering moves.
|
||||
- No custom (non-catalog) component types in v1. Catalog-strict.
|
||||
|
||||
## 3. Architecture
|
||||
|
||||
### 3.1 Boundary
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ OpenClaw.Tray.WinUI (existing) │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ NodeService │ │
|
||||
│ │ CanvasCapability (existing) │ │
|
||||
│ │ ├─ canvas.a2ui.push ──► A2UIPushRequested ─┐ │ │
|
||||
│ │ └─ canvas.a2ui.reset ──► A2UIResetRequested┐│ │ │
|
||||
│ │ ││ │ │
|
||||
│ │ OnCanvasA2UIPush / OnCanvasA2UIReset (existing)││ │ │
|
||||
│ │ dispatched to UI thread, route to: ││ │ │
|
||||
│ └─────────────────────────────────────────────────││──┘ │
|
||||
│ ▼▼ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ A2UICanvasWindow (new) ─ replaces WebView2 path │ │
|
||||
│ │ ├─ A2UIRouter (parses & dispatches msgs) │ │
|
||||
│ │ ├─ SurfaceHost x N (one per createSurface) │ │
|
||||
│ │ │ └─ ComponentTree (XAML) │ │
|
||||
│ │ ├─ DataModelStore (per surface) │ │
|
||||
│ │ ├─ ActionDispatcher (UI events → ws/MCP) │ │
|
||||
│ │ └─ ThemeProvider (Fluent + payload overrides) │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
The existing `CanvasCapability` and the events it raises (`A2UIPushRequested`, `A2UIResetRequested`) are unchanged. `NodeService.OnCanvasA2UIPush` no longer calls `EnsureA2UIHostAsync` / `SendA2UIMessageAsync` against a WebView2; it instead hands the JSONL to a new `A2UICanvasWindow` (or the existing `CanvasWindow` if we choose to host both renderers).
|
||||
|
||||
### 3.2 Coexistence with WebView2 canvas
|
||||
|
||||
Two canvas modes share the surface:
|
||||
|
||||
| Mode | Trigger | Window |
|
||||
|---|---|---|
|
||||
| Web (`canvas.present` / `canvas.navigate` / `canvas.eval`) | URL or HTML payload | `CanvasWindow` (WebView2) — unchanged |
|
||||
| A2UI native | First `canvas.a2ui.push` since reset | `A2UICanvasWindow` (XAML) — new |
|
||||
|
||||
A user-visible toggle is *not* required — the choice is implicit in which MCP command the agent calls. The two windows must not compete for focus; if both want to be visible, the most-recently-targeted wins (last-write-wins, with a small fade between).
|
||||
|
||||
### 3.3 Component pipeline
|
||||
|
||||
```
|
||||
JSONL line
|
||||
→ System.Text.Json deserialize → A2UIMessage (sealed record hierarchy)
|
||||
→ A2UIRouter.Dispatch(message)
|
||||
├─ CreateSurface → SurfaceHost.Create(surfaceId, catalogId, theme)
|
||||
├─ UpdateComponents → SurfaceHost.ApplyComponents(adjacencyList)
|
||||
├─ UpdateDataModel → DataModelStore.Apply(surfaceId, patch)
|
||||
└─ DeleteSurface → SurfaceHost.Dispose(surfaceId)
|
||||
→ SurfaceHost rebuilds/patches its XAML subtree on the UI thread
|
||||
→ DataModel changes notify bound components via INotifyPropertyChanged
|
||||
```
|
||||
|
||||
Component identity is by **string ID**. A `LogicalTreeBuilder` keeps an `IDictionary<string, FrameworkElement>` per surface so `updateComponents` can mutate in place without rebuilding the entire tree (avoids flicker, preserves focus and scroll position).
|
||||
|
||||
## 4. Wire protocol
|
||||
|
||||
### 4.1 Inbound (agent → node)
|
||||
|
||||
Use the existing capability commands verbatim. No protocol change is required for this work.
|
||||
|
||||
```json
|
||||
{ "version": "v0.8", "createSurface": { "surfaceId": "main", "catalogId": "https://a2ui.org/specification/v0_8/standard_catalog.json", "sendDataModel": true } }
|
||||
{ "updateComponents": { "surfaceId": "main", "components": [ { "id": "root", "componentName": "Column", "properties": {...}, "children": ["title","actions"] }, ... ] } }
|
||||
{ "updateDataModel": { "surfaceId": "main", "patch": { "/userName": "Scott" } } }
|
||||
{ "deleteSurface": { "surfaceId": "main" } }
|
||||
```
|
||||
|
||||
The renderer SHOULD validate each line against the v0.8 envelope schema before dispatch. The schema lives at `vendor/a2ui/specification/0.8/json/server_to_client.json` in the openclaw repo and should be vendored into `OpenClaw.Shared/Schemas/A2UI_v0_8/`.
|
||||
|
||||
Unknown envelope keys → log + skip (do not throw). Unknown component types → render an `A2UIUnknown` placeholder showing the type name and a warning glyph; never crash.
|
||||
|
||||
### 4.2 Outbound (node → agent)
|
||||
|
||||
When the user interacts with a surface, the renderer raises an A2UI **action** event. Action delivery rides whichever transport the node is currently connected through:
|
||||
|
||||
- Gateway-connected: serialize as the v0.8 client→server envelope and ship over the existing WebSocket via `_nodeClient`.
|
||||
- MCP-only: emit as an MCP notification on a new `canvas/a2ui/action` channel (to be added to `CanvasCapability`).
|
||||
|
||||
Action payload shape (v0.8):
|
||||
|
||||
```json
|
||||
{
|
||||
"action": {
|
||||
"name": "primary",
|
||||
"surfaceId": "main",
|
||||
"sourceComponentId": "btn_submit",
|
||||
"timestamp": "2026-04-25T18:32:11.123Z",
|
||||
"context": { "/email": "user@example.com" }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`context` is the (possibly partial) data model snapshot relevant to the source component, computed by walking the component's `dataBinding` paths and the surface's `sendDataModel` flag.
|
||||
|
||||
## 5. Component mapping (v0.8 standard catalog)
|
||||
|
||||
| A2UI component | WinUI 3 control | Notes |
|
||||
|---|---|---|
|
||||
| **Containers** | | |
|
||||
| `Row` | `StackPanel` (Horizontal) inside a wrap-aware `ItemsRepeater` when `wrap=true` | Match `bootstrap.js`'s wrap behavior at < 860 px |
|
||||
| `Column` | `StackPanel` (Vertical) | `min-width: 0` analog: clamp via `MinWidth=0` |
|
||||
| `List` | `ItemsRepeater` + `ItemsRepeaterScrollHost` | Virtualization on by default |
|
||||
| `Card` | `Border` with `Microsoft.UI.Xaml.Media.MicaBackdrop`-aware brush + corner radius + drop shadow | |
|
||||
| `Tabs` | `TabView` (controls) | Lightweight chrome to match Lit version |
|
||||
| `Modal` | `ContentDialog` (or full-window overlay `Grid` w/ `AcrylicBrush`) | Track Lit's full-screen overlay style — `dialog::backdrop` analog is `AcrylicBrush` over the parent |
|
||||
| **Display** | | |
|
||||
| `Text` | `TextBlock` | Map A2UI `style` (h1/h2/body/caption/etc.) to Fluent type ramp |
|
||||
| `Image` | `Image` w/ `BitmapImage` source; HTTP fetch via `HttpClient` with allowlist | Reject `file:`, `javascript:`, `data:` (except small `image/png|jpeg|webp`) |
|
||||
| `Icon` | `FontIcon` (Segoe Fluent Icons) keyed by name | Maintain a name→glyph map; missing → outlined question-mark |
|
||||
| `Video` | `MediaPlayerElement` | |
|
||||
| `AudioPlayer` | `MediaPlayerElement` w/ audio-only template | |
|
||||
| `Divider` | `Rectangle` (1px `SystemControlForegroundBaseLowBrush`) or `MenuFlyoutSeparator` style | |
|
||||
| **Interactive** | | |
|
||||
| `Button` | `Button` (variants → `AccentButtonStyle`, `DefaultButtonStyle`) | Triggers action with `name` |
|
||||
| `CheckBox` | `CheckBox` | Two-way bind to data model path |
|
||||
| `TextField` | `TextBox` (multiline → `TextBox.AcceptsReturn=true`) | `inputType` → `InputScope` mapping |
|
||||
| `DateTimeInput` | `CalendarDatePicker` + `TimePicker` (composed) | |
|
||||
| `ChoicePicker` | `ComboBox` (single) / `ListView` w/ `SelectionMode=Multiple` (multi) | |
|
||||
| `Slider` | `Slider` | |
|
||||
|
||||
Each mapping lives in a single `IComponentRenderer` implementation under `OpenClaw.Tray.WinUI/A2UI/Renderers/`. The set is closed at compile time (catalog-strict) — no runtime registration in v1.
|
||||
|
||||
## 6. Data model & binding
|
||||
|
||||
A2UI surfaces carry a JSON data model. Components reference paths into that model (`"/userName"`, `"/items/0/title"`).
|
||||
|
||||
### 6.1 Storage
|
||||
|
||||
`DataModelStore` holds one `JsonObject` per surface, mutated via JSON Pointer (RFC 6901) patches. Use `System.Text.Json.Nodes` for in-place edits (already a dependency).
|
||||
|
||||
### 6.2 Binding
|
||||
|
||||
Bindings are **one-way for display** components, **two-way for interactive** components. Implement via:
|
||||
|
||||
- `DataModelObservable` — wraps a `JsonObject` and exposes `INotifyPropertyChanged` per registered path.
|
||||
- `A2UIBinding` markup extension (or code-behind helpers) — produces `Binding` objects that target a path observer.
|
||||
|
||||
Why not raw `Microsoft.UI.Xaml.Data.Binding` paths? JSON paths can include array indices and slashes, which XAML binding paths don't model cleanly. A small adapter is simpler and faster than fighting the binding engine.
|
||||
|
||||
### 6.3 Patches
|
||||
|
||||
`updateDataModel.patch` is an object whose keys are JSON Pointer paths and whose values are replacement values. Apply atomically; coalesce notifications so multiple paths in one message produce a single render pass.
|
||||
|
||||
## 7. Action dispatch
|
||||
|
||||
Components that can produce actions register a callback:
|
||||
|
||||
```csharp
|
||||
internal sealed class ButtonRenderer : IComponentRenderer
|
||||
{
|
||||
public FrameworkElement Render(A2UIComponent c, RenderContext ctx)
|
||||
{
|
||||
var btn = new Button { Content = c.GetText("label") };
|
||||
btn.Click += (_, _) => ctx.Actions.Raise(new A2UIAction(
|
||||
name: c.GetString("actionName") ?? "press",
|
||||
surfaceId: ctx.SurfaceId,
|
||||
sourceComponentId: c.Id,
|
||||
context: ctx.DataModel.SnapshotFor(c)));
|
||||
return btn;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`ActionDispatcher.Raise` is the single seam through which actions leave the renderer. It handles:
|
||||
|
||||
1. Throttle/debounce (per `name` + `sourceComponentId`) to suppress double-clicks.
|
||||
2. Serialization to A2UI v0.8 client→server envelope.
|
||||
3. Routing: gateway WS first, then MCP notification, with a fallback queue if neither is available.
|
||||
|
||||
## 8. Theming
|
||||
|
||||
Default to `XamlControlsResources` + Fluent theme colors. The `createSurface.theme` payload may override:
|
||||
|
||||
- `colors`: map onto `ThemeResource` overrides applied to the `SurfaceHost` resource scope (no global mutation).
|
||||
- `typography`: optional font family override; respect Windows accessibility text scaling first.
|
||||
- `radius`, `spacing`: passed through to renderers via `RenderContext`.
|
||||
|
||||
Theme application is local to the surface's visual tree — switching themes between surfaces does not flash the chrome.
|
||||
|
||||
## 9. Lifecycle & hosting
|
||||
|
||||
### 9.1 Window
|
||||
|
||||
`A2UICanvasWindow` extends `Window`:
|
||||
|
||||
- One window total (singleton). Multiple surfaces stack as `TabView` items if `>1` is active; single surface fills the content area.
|
||||
- Title pulls from `createSurface.title` (new optional v0.8 field already used by openclaw) or defaults to "Canvas".
|
||||
- Window chrome: backdrop = `MicaBackdrop` on Win11, `AcrylicBackdrop` fallback.
|
||||
- Persistence: position/size remembered across sessions (per existing `CanvasWindow` settings keys; reuse where possible).
|
||||
|
||||
### 9.2 Threading
|
||||
|
||||
All renderer mutation runs on the UI dispatcher (`DispatcherQueue.GetForCurrentThread()`). The router accepts pushes from any thread and posts via `TryEnqueue`.
|
||||
|
||||
### 9.3 Reset
|
||||
|
||||
`canvas.a2ui.reset` (already wired through `A2UIResetRequested`) → `A2UIRouter.ResetAll()` → dispose every `SurfaceHost`, clear stores, re-show empty placeholder.
|
||||
|
||||
## 10. Security model
|
||||
|
||||
- **Catalog-strict.** Component types are baked in at compile time. There is no JS, no HTML escape, no `eval`. Unknown types render a placeholder.
|
||||
- **URL allowlist for media.** `Image`, `Video`, `AudioPlayer` URL fetches go through a single `MediaResolver` that:
|
||||
- Allows `https://` from a configurable allowlist (default: empty until set by the agent's surface theme/manifest).
|
||||
- Allows `data:image/png|jpeg|webp` up to 2 MiB.
|
||||
- Rejects everything else; renders broken-image glyph.
|
||||
- **Action context scoping.** `context` includes only data model paths the source component declares it reads (`dataBinding`), preventing accidental leak of unrelated form state.
|
||||
- **No file system or process access** from inside a surface. Those go through other capabilities (`system.run`, `screen.*`) which already have their own approval flow.
|
||||
- **Logging.** Each inbound message is logged at Info with surface ID + component count; PII fields in the data model SHOULD be redacted at log time using a path denylist (`/password`, `/secret*`, `/token`).
|
||||
|
||||
## 11. Telemetry
|
||||
|
||||
Mirror what `CanvasCapability` already logs:
|
||||
|
||||
- `a2ui.push` (count, jsonl byte length, surface IDs touched, render time ms)
|
||||
- `a2ui.action` (surface ID, action name, queue latency)
|
||||
- `a2ui.unknown_component` (type name) — to drive catalog upgrades
|
||||
- `a2ui.media_blocked` (URL scheme/host) — to tune the allowlist
|
||||
|
||||
## 12. Testing
|
||||
|
||||
- **Unit:** schema validation, JSON pointer apply, action serialization, component-to-XAML mapping per type.
|
||||
- **Visual regression:** golden images per component using WinAppDriver or a snapshot harness — gate on hash + tolerance.
|
||||
- **Spec conformance:** drive the renderer with the official v0.8 conformance fixtures from `vendor/a2ui/specification/0.8/eval/` (reused from the openclaw monorepo) and assert action outputs match expected.
|
||||
- **Stress:** 10k component surface, 1k updateComponents/sec → renderer must not block the UI thread > 16 ms p95.
|
||||
- **Parity:** record the JSONL stream of an existing Lit-rendered openclaw surface, replay through the WinUI renderer, diff screenshots.
|
||||
|
||||
## 13. Phasing
|
||||
|
||||
| Phase | Scope | Exit criteria |
|
||||
|---|---|---|
|
||||
| **0 — Spike** | `Text`, `Column`, `Button` only; one surface; no data model | Single button click round-trips to agent |
|
||||
| **1 — Catalog parity** | All v0.8 standard catalog types; data model + bindings; modal/tabs | Full conformance fixtures pass |
|
||||
| **2 — Polish** | Theming, transitions, focus management, accessibility (Narrator), keyboard nav | A11y audit clean; UX review against Lit version |
|
||||
| **3 — Coexistence** | Native window default; WebView2 path retained behind `--canvas=web` flag for parity testing | No regressions in WebView2 path |
|
||||
| **4 — v0.9 migration** | Bidirectional messages, modular schemas, prompt-first | Tracks Google A2UI v0.9 release |
|
||||
|
||||
## 14. Open questions
|
||||
|
||||
> Resolved 2026-04-27 — see decisions below; previous wording preserved for context.
|
||||
|
||||
1. **Window count.** One A2UI window with tabs for multiple surfaces, or one window per surface? Lit version uses one host with multiple stacked surfaces.
|
||||
**Decision:** stay with the Lit-compatible single-window-with-tabs layout. Multiple windows is out of scope for v1.
|
||||
2. **Component overrides.** Should we expose a hook for downstream apps to swap in custom renderers?
|
||||
**Decision:** stay catalog-strict for v1. No extension seam yet — easy to add later if a real customer asks.
|
||||
3. **Theme negotiation.** Should the agent be told "I'm a native WinUI client, prefer Fluent tokens" via `clientCapabilities`?
|
||||
**Decision:** yes — advertise Fluent token preference in `clientCapabilities`. (Tracking task: wire this into the capability summary returned by `canvas.caps`.)
|
||||
4. **Animation budget.** Define a small transition set (fade, slide) and apply automatically, or stay still?
|
||||
**Decision:** stay still until the agent asks. No automatic transitions in v1.
|
||||
5. **Image caching.** Per-surface, per-process, or persistent?
|
||||
**Decision:** per-process LRU. Avoids the repeated-fetch cost of per-surface and the staleness risk of persistent disk caching.
|
||||
|
||||
## 15. References
|
||||
|
||||
- A2UI v0.8 spec: <https://a2ui.org/specification/v0.8-a2ui/>
|
||||
- v0.8 JSON schemas (vendored): `openclaw/vendor/a2ui/specification/0.8/json/`
|
||||
- Reference Lit renderer: `openclaw/vendor/a2ui/renderers/lit/`
|
||||
- Current Windows node A2UI bridge: `src/OpenClaw.Tray.WinUI/Windows/CanvasWindow.xaml.cs` (`EnsureA2UIHostAsync`, `BuildA2UIMessageScript`)
|
||||
- Current capability surface: `src/OpenClaw.Shared/Capabilities/CanvasCapability.cs`
|
||||
- Android handler (good reference for v0.8 validation rules): `openclaw/apps/android/.../A2UIHandler.kt`
|
||||
@ -7,7 +7,10 @@ OpenClaw Tray uses WinUI `.resw` resource files for localization. Windows automa
|
||||
| Language | Locale | Resource File |
|
||||
|----------|--------|---------------|
|
||||
| English (US) | `en-us` | `Strings/en-us/Resources.resw` |
|
||||
| French (France) | `fr-fr` | `Strings/fr-fr/Resources.resw` |
|
||||
| Dutch (Netherlands) | `nl-nl` | `Strings/nl-nl/Resources.resw` |
|
||||
| Chinese (Simplified) | `zh-cn` | `Strings/zh-cn/Resources.resw` |
|
||||
| Chinese (Traditional) | `zh-tw` | `Strings/zh-tw/Resources.resw` |
|
||||
|
||||
## Adding a New Language
|
||||
|
||||
@ -43,7 +46,7 @@ OpenClaw Tray uses WinUI `.resw` resource files for localization. Windows automa
|
||||
|
||||
5. **Do not translate resource key names** (the `name` attribute). Only translate `<value>` content.
|
||||
|
||||
6. **Submit a pull request** with just your new `Resources.resw` file. No code changes are needed — the build system automatically discovers new locale folders.
|
||||
6. **Submit a pull request** with just your new `Resources.resw` file. No code changes are needed — the build system and localization tests automatically discover new locale folders.
|
||||
|
||||
## How It Works
|
||||
|
||||
@ -65,16 +68,16 @@ Windows picks the language automatically based on the user's OS display language
|
||||
|
||||
## Testing a Language Locally
|
||||
|
||||
To test a specific locale without changing your Windows language:
|
||||
Set the `OPENCLAW_LANGUAGE` environment variable before launching the app:
|
||||
|
||||
1. Open `src/OpenClaw.Tray.WinUI/App.xaml.cs`
|
||||
2. Add this line at the top of the `App()` constructor, **before** `InitializeComponent()`:
|
||||
```csharp
|
||||
LocalizationHelper.SetLanguageOverride("zh-CN");
|
||||
```
|
||||
3. Build and run (`dotnet build src/OpenClaw.Tray.WinUI -r win-x64`). Remove the line when done testing.
|
||||
```powershell
|
||||
$env:OPENCLAW_LANGUAGE = "fr-fr" # or nl-nl, zh-cn, zh-tw
|
||||
.\src\OpenClaw.Tray.WinUI\bin\Debug\net10.0-windows10.0.19041.0\win-x64\OpenClaw.Tray.WinUI.exe
|
||||
```
|
||||
|
||||
> **Note:** This overrides `LocalizationHelper.GetString()` calls (menus, toasts, dialogs, window titles). XAML `x:Uid` bindings follow the OS display language. For full XAML localization testing, change your Windows display language in Settings → Time & Language.
|
||||
This overrides `LocalizationHelper.GetString()` calls for menus, toasts, dialogs, and the onboarding wizard. The language is validated against the supported locale list.
|
||||
|
||||
> **Note:** XAML `x:Uid` bindings follow the OS display language. For full localization testing including XAML elements, change your Windows display language in Settings → Time & Language.
|
||||
|
||||
## Resource Key Naming Conventions
|
||||
|
||||
@ -87,12 +90,31 @@ To test a specific locale without changing your Windows language:
|
||||
| `Status_Name` | Status display text | `Status_Connected` |
|
||||
| `TimeAgo_Format` | Relative time strings | `TimeAgo_MinutesFormat` |
|
||||
|
||||
### Onboarding Key Namespace
|
||||
|
||||
All onboarding wizard strings use the `Onboarding_` prefix:
|
||||
|
||||
| Pattern | Used For | Example |
|
||||
|---------|----------|---------|
|
||||
| `Onboarding_PageName_Label` | Page titles, descriptions | `Onboarding_Welcome_Title` |
|
||||
| `Onboarding_Connection_*` | Connection page labels/status | `Onboarding_Connection_TestConnection` |
|
||||
| `Onboarding_Perm_*` | Permission names | `Onboarding_Perm_Camera` |
|
||||
| `Onboarding_Ready_*` | Ready page elements | `Onboarding_Ready_Feature_Voice_Subtitle` |
|
||||
| `Onboarding_Wizard_*` | Wizard page elements | `Onboarding_Wizard_Continue` |
|
||||
|
||||
## Validation
|
||||
|
||||
Both resource files must have the **same set of keys**. You can verify with:
|
||||
All resource files must have the **same set of keys**. Locale directories are discovered dynamically under `Strings/`, so adding a new `Strings/<locale>/Resources.resw` file automatically brings it under validation. You can verify counts with:
|
||||
|
||||
```powershell
|
||||
$en = (Select-String -Path "src\OpenClaw.Tray.WinUI\Strings\en-us\Resources.resw" -Pattern '<data name="' | Measure-Object).Count
|
||||
$new = (Select-String -Path "src\OpenClaw.Tray.WinUI\Strings\<locale>\Resources.resw" -Pattern '<data name="' | Measure-Object).Count
|
||||
Write-Host "en-us: $en keys | <locale>: $new keys | Match: $($en -eq $new)"
|
||||
$base = "src\OpenClaw.Tray.WinUI\Strings"
|
||||
Get-ChildItem $base -Directory | ForEach-Object {
|
||||
$loc = $_.Name
|
||||
$count = (Select-String -Path "$base\$loc\Resources.resw" -Pattern '<data name="' | Measure-Object).Count
|
||||
Write-Host "$loc : $count keys"
|
||||
}
|
||||
```
|
||||
|
||||
All locale counts should match. Missing or extra keys indicate an incomplete translation.
|
||||
|
||||
Non-English resource values must also follow the all-or-none rule enforced by `LocalizationValidationTests`: each key is either translated in every non-English locale, intentionally invariant in every non-English locale, or explicitly deferred with rationale. Partial translation, where only some non-English locales differ from `en-us`, is treated as a regression.
|
||||
|
||||
279
docs/MCP_MODE.md
Normal file
279
docs/MCP_MODE.md
Normal file
@ -0,0 +1,279 @@
|
||||
# Local MCP Mode
|
||||
|
||||
**Status:** Implemented (initial cut). See `src/OpenClaw.Shared/Mcp/`, `src/OpenClaw.Shared/Mcp/McpHttpServer.cs`, and the Settings UI MCP section.
|
||||
|
||||
## Summary
|
||||
|
||||
The Windows tray app now ships a **local Model Context Protocol (MCP) server** alongside its existing OpenClaw gateway client. The same node capabilities the agent reaches over the OpenClaw gateway WebSocket — `system.run`, `screen.snapshot`, `canvas.*`, `camera.list`, `camera.snap`, `camera.clip`, `location.get`, `tts.speak`, `system.notify`, `system.execApprovals.*` — are advertised, on the same machine, as MCP tools over `http://127.0.0.1:8765/`.
|
||||
|
||||
This means any local MCP client (Claude Desktop, Claude Code, Cursor, an MCP-aware CLI, a custom dev script) can reach into the running tray and drive Windows-native capabilities directly, without an OpenClaw gateway in the loop. The tray app can run in **MCP-only mode** with no gateway connection at all.
|
||||
|
||||
The implementation is structured so that **adding a new node capability automatically exposes it via MCP** — no MCP-side code changes required. That is the central design constraint and the main reason we built MCP in-process rather than as a separate adapter.
|
||||
|
||||
## Goals
|
||||
|
||||
1. **Single source of truth for capabilities.** A new `INodeCapability` registered with `WindowsNodeClient.RegisterCapability(...)` is reachable via every transport the tray supports. Today: gateway WebSocket and local MCP HTTP. Future transports (named pipe, gRPC, whatever) plug in the same way.
|
||||
2. **Local-first development.** Capabilities can be exercised on Windows without standing up an OpenClaw gateway, without an account, without auth, without a tunnel.
|
||||
3. **Make MCP clients first-class consumers** of the OpenClaw native node, not afterthoughts. The tooling investment in capabilities (camera consent flows, exec approval policy, canvas WebView2 plumbing) pays off in both directions: agent-via-gateway and agent-via-local-MCP.
|
||||
|
||||
## Non-goals (for this iteration)
|
||||
|
||||
- **No remote authentication.** Loopback bind + Origin/Host checks keep the endpoint unreachable from any other machine. A local bearer token guards against untrusted local processes on the same box (see [Authentication](#authentication) below). We will revisit ACLs / multi-user when we want remote MCP, multiple users on one box, or shared dev VMs.
|
||||
- **No SSE / streaming.** Plain JSON-RPC request/response is enough for the synchronous capabilities we have today.
|
||||
- **No per-tool input schemas.** Capabilities don't expose schemas; MCP `inputSchema` is permissive (`{type: "object", additionalProperties: true}`). When/if `INodeCapability` grows a schema property, the MCP bridge picks it up with no other changes.
|
||||
- **No port configuration UI.** Default `8765` is hardcoded. Easy to lift into `SettingsManager` later.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Single capability registry, two transports
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ NodeService │
|
||||
│ │
|
||||
│ List<INodeCapability> _capabilities ◄───┐ │
|
||||
│ │ │
|
||||
│ private void Register(INodeCapability) │ │
|
||||
│ { │ │
|
||||
│ _capabilities.Add(cap); │ │
|
||||
│ _nodeClient?.RegisterCapability(cap)│ │
|
||||
│ } │ │
|
||||
└────┬───────────────────────┬──────────────┘─┘
|
||||
│ │
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────────────────┐ ┌─────────────────────┐
|
||||
│ WindowsNodeClient │ │ McpToolBridge │
|
||||
│ (gateway WebSocket) │ │ (JSON-RPC dispatch) │
|
||||
└─────────┬───────────┘ └─────────┬───────────┘
|
||||
│ │
|
||||
▼ ▼
|
||||
OpenClaw gateway McpHttpServer
|
||||
(HttpListener@127.0.0.1:8765)
|
||||
│
|
||||
▼
|
||||
Local MCP clients
|
||||
(Claude Code, Cursor, etc.)
|
||||
```
|
||||
|
||||
The capability list lives on `NodeService`, *not* on `WindowsNodeClient`. That single change is what makes MCP-only mode possible: the gateway client is now optional. When it exists, `Register(cap)` pushes capabilities into both the local list and the gateway client's registration message. When it doesn't (MCP-only), capabilities still populate the local list and the MCP bridge serves them.
|
||||
|
||||
### MCP bridge
|
||||
|
||||
`OpenClaw.Shared/Mcp/McpToolBridge.cs` is transport-agnostic JSON-RPC 2.0. It implements:
|
||||
|
||||
- `initialize` — protocol version `2024-11-05`, server info.
|
||||
- `tools/list` — flattens `_capabilities` into MCP tools. Tool name = command name (`"screen.snapshot"`); description = `"{category} capability: {command}"`; `inputSchema` is permissive.
|
||||
- `tools/call` — finds the capability via `INodeCapability.CanHandle(name)`, builds a `NodeInvokeRequest` (the same struct the gateway path uses), calls `ExecuteAsync`, wraps the result as MCP `content[].text`. Tool failures come back as `result.isError = true`, not JSON-RPC errors (per MCP spec — JSON-RPC errors are reserved for protocol issues).
|
||||
- `ping`, `notifications/initialized` — protocol housekeeping.
|
||||
|
||||
The bridge takes a `Func<IReadOnlyList<INodeCapability>>` rather than a snapshot. Every `tools/list` re-reads the live list. This is what guarantees zero-cost capability addition — register a new capability after server start and it appears on the next `tools/list`.
|
||||
|
||||
### HTTP transport
|
||||
|
||||
`OpenClaw.Shared/Mcp/McpHttpServer.cs` is `System.Net.HttpListener` bound to `http://127.0.0.1:8765/`. Loopback-only by construction; not reachable from any other machine even with firewall holes. A defensive `IPAddress.IsLoopback` check on each request acts as belt-and-suspenders.
|
||||
|
||||
`GET /` returns a friendly text probe. `POST /` is JSON-RPC. Anything else → `405`. When a bearer token is configured, every verb must pass the token gate before method dispatch.
|
||||
|
||||
## Authentication
|
||||
|
||||
The HTTP transport requires a bearer token on every request. Defense-in-depth on top of loopback bind + Origin/Host checks: if an attacker can run code in *any* local user context they can reach `127.0.0.1:8765`, so we don't want the listener to be open-by-construction.
|
||||
|
||||
**Where the token lives.** `%APPDATA%\OpenClawTray\mcp-token.txt`. The exact path is composed by `NodeService.McpTokenPath` from `SettingsManager.SettingsDirectoryPath`, so the test-suite override `OPENCLAW_TRAY_DATA_DIR` isolates the token file too. The file inherits the parent directory's ACL — by default only the current user (and SYSTEM/Administrators) can read it.
|
||||
|
||||
**When it's created.** Lazily, on the first `NodeService.StartMcpServer()` call — i.e. the first time the user enables Local MCP Server in Settings and saves. **Until that toggle has been on at least once, the file does not exist.** This trips up users who try to grab the token before flipping the switch.
|
||||
|
||||
**How long it is.** 32 bytes of CSPRNG output, base64url-encoded with padding stripped → **43 ASCII characters** (~256 bits of entropy). See `McpAuthToken.Generate()`.
|
||||
|
||||
**Lifetime.** The token is **persistent across tray restarts**. It's only regenerated if the file is deleted or its contents are emptied. There is no automatic rotation.
|
||||
|
||||
**On the wire.** Every request must carry `Authorization: Bearer <token>` when the server has a configured token. Missing or wrong token → `401 Unauthorized` with no body. `GET /` remains a "yes I'm here" probe after auth passes.
|
||||
|
||||
**How users find it.** Settings → Developer Mode → MCP section shows the live token (masked, with Reveal/Copy buttons) and the storage path. For agents that read from disk (Claude Code, custom scripts), pointing them at `McpTokenPath` is preferable to embedding the token in their prompt or config — the path is stable, the token is a secret. For agents that only accept literal bearer values in config (Claude Desktop, Cursor), use Copy.
|
||||
|
||||
### Settings model
|
||||
|
||||
Two independent toggles in `SettingsData`:
|
||||
|
||||
```csharp
|
||||
public bool EnableNodeMode { get; set; } // open WebSocket to gateway
|
||||
public bool EnableMcpServer { get; set; } // run local MCP HTTP server
|
||||
```
|
||||
|
||||
| `EnableNodeMode` | `EnableMcpServer` | Result |
|
||||
|---|---|---|
|
||||
| off | off | Operator-only (legacy default) |
|
||||
| off | on | **MCP server only, no gateway** |
|
||||
| on | off | Gateway node, no MCP |
|
||||
| on | on | Gateway node + MCP |
|
||||
|
||||
Settings UI exposes both toggles in the Advanced section, with the live MCP endpoint URL and current status (`Listening` / `Stopped — save and restart to start` / `Disabled`).
|
||||
|
||||
A legacy `McpOnlyMode` field is migrated automatically on load and never re-written.
|
||||
|
||||
## Why this matters
|
||||
|
||||
### Testing
|
||||
|
||||
The tray's most interesting code lives in capabilities — `system.run` (LocalCommandRunner + ExecApprovalPolicy), `screen.snapshot` (Windows.Graphics.Capture + GraphicsCapturePicker), `canvas.*` (WebView2 with trusted origin enforcement), `camera.snap`/`camera.clip` (MediaCapture + consent prompt), `location.get` (Windows.Devices.Geolocation). All of that has nontrivial Windows-only behavior and almost none of it is currently exercised end-to-end without first standing up a gateway and authenticating.
|
||||
|
||||
Local MCP changes that. Concrete benefits:
|
||||
|
||||
- **Manual smoke tests in seconds.** `curl -s -X POST http://127.0.0.1:8765/ -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}'` validates that the capability dispatch path works, the WinUI dispatcher marshaling is correct, the result shape matches expectations. No gateway, no token, no SSH tunnel.
|
||||
- **Reproducible bug reports.** A repro becomes a `tools/call` body the bug filer can paste verbatim. No "what was the gateway doing at the time."
|
||||
- **Integration tests against a real instance.** A future `tests/integration/` project can spin up the tray in MCP-only mode, fire JSON-RPC, assert results. The same test bodies a developer runs by hand are the same ones CI runs. (Harnessing WinUI itself in CI is harder, but the bridge logic — `McpToolBridge` — is already covered by `McpToolBridgeTests` with no UI involvement.)
|
||||
- **Coverage for the dispatch path itself.** `WindowsNodeClient`'s capability-routing logic (`CanHandle` → `ExecuteAsync`) was previously only exercised against a live gateway. The MCP server hits the same code paths, so any local MCP test is implicit coverage of the gateway dispatch.
|
||||
- **Bridge unit tests already exist.** `tests/OpenClaw.Shared.Tests/McpToolBridgeTests.cs` (9 cases) covers initialize, tools/list, runtime capability registration, tool calls, unknown tools, capability failures, JSON-RPC unknown method, notifications, and parse errors. These are pure C# unit tests with fake capabilities — no HTTP, no UI, no gateway.
|
||||
|
||||
### Access from CLIs and agents
|
||||
|
||||
The exact same node tools the OpenClaw gateway uses are now invocable by any local MCP-aware client:
|
||||
|
||||
- **Claude Code** (this CLI). Add to `~/.claude.json` or per-project `.mcp.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"openclaw-tray": {
|
||||
"type": "http",
|
||||
"url": "http://127.0.0.1:8765/"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The agent then sees `screen.snapshot`, `system.run`, `canvas.*`, etc. as tools, with whatever arguments the capability accepts.
|
||||
|
||||
- **Claude Desktop.** Same config shape under MCP servers.
|
||||
- **Cursor.** Same.
|
||||
- **GitHub Copilot CLI / Copilot in the terminal.** As MCP support lands in those clients, the endpoint is already there.
|
||||
- **Custom dev scripts.** Anything that can speak HTTP + JSON-RPC. A 30-line Python or Node helper can drive the entire capability surface.
|
||||
|
||||
In all cases the user gets a Windows-native agent experience without OpenClaw infrastructure. They can be entirely offline w.r.t. an OpenClaw gateway and still hand the LLM a working set of "do something on my Windows box" tools.
|
||||
|
||||
### Dev acceleration when building new features
|
||||
|
||||
This is the strongest argument for making MCP a first-class citizen, not an afterthought.
|
||||
|
||||
When a contributor adds a new capability — say, `clipboard.read`, `clipboard.write`, `windows.list`, `audio.transcribe`, `git.status`, `office.draft_email` — today the workflow looks like:
|
||||
|
||||
1. Implement `INodeCapability`.
|
||||
2. Wire it into `NodeService.RegisterCapabilities()`.
|
||||
3. Stand up a gateway, authenticate, pair the device, etc., to test.
|
||||
4. Drive the capability from within an agent conversation, observing logs and taking screenshots to confirm correctness.
|
||||
|
||||
With MCP in-process the workflow shortens to:
|
||||
|
||||
1. Implement `INodeCapability`.
|
||||
2. Wire it into `NodeService.RegisterCapabilities()`.
|
||||
3. Restart the tray. The new tool is *immediately* visible to any local MCP client (`tools/list` re-reads the registry every call), and to manual `curl` tests.
|
||||
|
||||
The dev loop for capabilities is now identical to the dev loop for any local HTTP server: edit, restart, hit the endpoint, observe. No gateway, no agent, no auth.
|
||||
|
||||
This compounds when you stack it with Claude Code or Cursor on the same machine. A contributor can:
|
||||
|
||||
- Open the repo in their IDE.
|
||||
- Run the tray with `EnableMcpServer = true`.
|
||||
- Have Claude Code connected to the same MCP endpoint.
|
||||
- Iterate on a new capability while the agent — using that very capability — helps drive the iteration. The capability under development can be invoked by the assistant on the next turn after a tray restart. That's a tight self-hosted feedback loop.
|
||||
|
||||
It also reduces the cost of "speculative" capabilities. Today, adding a capability has a tax: it must be useful enough to justify the extra surface in the gateway/agent stack. With local MCP, a contributor can build a capability speculatively, validate it against their own MCP-aware agent, and only later decide whether to formalize it for gateway use. That lowers the bar for experimentation.
|
||||
|
||||
## Security model
|
||||
|
||||
The server is built on **three** defensive layers, not just one. Loopback alone is *not* sufficient — a browser tab the user opens is also on the loopback interface, so a malicious page could otherwise reach `http://127.0.0.1:8765/` directly.
|
||||
|
||||
1. **Loopback bind.** `HttpListener` is registered with the prefix `http://127.0.0.1:8765/`. The Windows kernel binds the listening socket to the loopback interface only — packets from other interfaces are not delivered to it. Firewall configuration is irrelevant. Defends against: another machine on the network.
|
||||
2. **Defensive `IsLoopback` check.** Each incoming request validates `ctx.Request.RemoteEndPoint.Address`. Belt-and-suspenders for #1.
|
||||
3. **CSRF / browser gate.** Each request is rejected if any of the following holds:
|
||||
- the request carries an `Origin` header (real MCP clients — Claude Desktop, Cursor, Claude Code, curl — never send `Origin`; browsers always do for cross-origin fetches);
|
||||
- the `Host` header is anything other than `127.0.0.1[:port]` or `localhost[:port]` (defends against DNS-rebinding pivots);
|
||||
- on `POST`, the `Content-Type` is anything other than `application/json` (forces a CORS preflight from a browser, which we never satisfy).
|
||||
- the request body exceeds 4 MiB (DoS / OOM cap).
|
||||
|
||||
Together these three checks force a malicious cross-origin browser fetch into a CORS preflight that we deliberately do not honor (no `Access-Control-Allow-*` is ever emitted), so the actual call is blocked before reaching capability code.
|
||||
4. **Concurrency cap.** A semaphore limits in-flight handlers to 8. A misbehaving local client cannot pin every threadpool thread on long-running screen/camera calls.
|
||||
5. **Capability-level controls remain in force.** `SystemCapability.SetApprovalPolicy(...)` (the exec approval policy) still gates `system.run`. Camera and screen capture still go through Windows consent flows. MCP doesn't bypass any of those.
|
||||
|
||||
**Still no authentication.** Any user-context local process with a TCP socket and the port number can drive any capability. This is the same trust boundary as anything that runs as the user — a malicious process on the box could already invoke arbitrary Win32 APIs without going through MCP. We don't try to stop user-context processes from talking to MCP. If that turns out to matter (multi-user shared boxes, low-trust local processes), the right answer is per-call bearer tokens issued by the tray (one-time copy-to-clipboard from the Settings UI), not URL ACLs or HTTPS — both add deployment pain without solving the actual problem.
|
||||
|
||||
### Verifying the gate
|
||||
|
||||
These should all be **rejected** with `403 Forbidden`:
|
||||
|
||||
```powershell
|
||||
# Browser pretending to come from another origin
|
||||
curl -X POST http://127.0.0.1:8765/ -H "Origin: https://evil.com" -H "Content-Type: application/json" -d '{}'
|
||||
|
||||
# DNS rebinding attempt
|
||||
curl -X POST http://127.0.0.1:8765/ -H "Host: evil.com" -H "Content-Type: application/json" -d '{}'
|
||||
```
|
||||
|
||||
This should be **rejected** with `415`:
|
||||
|
||||
```powershell
|
||||
curl -X POST http://127.0.0.1:8765/ -H "Content-Type: text/plain" --data '{"jsonrpc":"2.0","id":1,"method":"ping"}'
|
||||
```
|
||||
|
||||
These should **succeed**:
|
||||
|
||||
```powershell
|
||||
curl http://127.0.0.1:8765/ -H "Authorization: Bearer <token>" # GET probe
|
||||
curl -X POST http://127.0.0.1:8765/ -H "Authorization: Bearer <token>" -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1,"method":"ping"}'
|
||||
```
|
||||
|
||||
## What's deliberately deferred
|
||||
|
||||
These are reasonable next steps but explicitly out of scope for the initial implementation:
|
||||
|
||||
1. **Per-tool input schemas.** Add an `IReadOnlyDictionary<string, JsonElement> InputSchemas` (or per-command descriptor) to `INodeCapability`. The MCP bridge's `HandleToolsList` picks them up automatically. Until then, MCP clients see permissive schemas and the agent has to figure out arg shapes from descriptions and trial-and-error.
|
||||
2. ~~**Authentication.**~~ Implemented. See [Authentication](#authentication) below.
|
||||
3. **Streamable HTTP / SSE.** For long-running tools (`screen.record`, future `audio.transcribe`), MCP supports streaming progress. The bridge needs to learn about it and the HTTP server needs to optionally upgrade.
|
||||
4. **Resource and prompt support.** MCP has `resources/*` and `prompts/*` methods we currently no-op. Notifications, recent activity, channel state could be modeled as MCP resources.
|
||||
5. **Configurable port.** Move `McpDefaultPort` into `SettingsManager`. Probably also pick a free port at startup if the default is in use, and surface the actual port in the Settings UI.
|
||||
6. **Setup Wizard step.** Today the Settings Advanced section is the only way to enable MCP. The Setup Wizard could offer it as a one-click option, especially attractive for users who don't run a gateway at all.
|
||||
|
||||
## File map
|
||||
|
||||
| File | Role |
|
||||
|---|---|
|
||||
| `src/OpenClaw.Shared/Mcp/McpToolBridge.cs` | Transport-agnostic JSON-RPC dispatcher. |
|
||||
| `src/OpenClaw.Shared/SettingsData.cs` | Settings JSON model. Adds `EnableMcpServer`; deprecates `McpOnlyMode`. |
|
||||
| `src/OpenClaw.Shared/Mcp/McpHttpServer.cs` | `HttpListener`-based loopback HTTP transport. |
|
||||
| `src/OpenClaw.Tray.WinUI/Services/NodeService.cs` | Owns the capability list. Hosts the MCP server when enabled. |
|
||||
| `src/OpenClaw.Tray.WinUI/Services/SettingsManager.cs` | In-memory settings model + load/save. Migrates legacy `McpOnlyMode`. |
|
||||
| `src/OpenClaw.Tray.WinUI/Pages/SettingsPage.xaml(.cs)` | Settings UI surface hosted by `HubWindow`. |
|
||||
| `src/OpenClaw.Tray.WinUI/App.xaml.cs` | Bootstraps `NodeService` based on the new mode matrix. |
|
||||
| `tests/OpenClaw.Shared.Tests/McpToolBridgeTests.cs` | 9 unit tests for the bridge. |
|
||||
|
||||
## Quick verification
|
||||
|
||||
With the tray running and `EnableMcpServer = true`:
|
||||
|
||||
```powershell
|
||||
# Server is up
|
||||
curl http://127.0.0.1:8765/
|
||||
|
||||
# List tools
|
||||
curl -s -X POST http://127.0.0.1:8765/ `
|
||||
-H "Content-Type: application/json" `
|
||||
-d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}'
|
||||
|
||||
# Take a screenshot of the primary monitor
|
||||
curl -s -X POST http://127.0.0.1:8765/ `
|
||||
-H "Content-Type: application/json" `
|
||||
-d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"screen.snapshot"}}'
|
||||
```
|
||||
|
||||
For Claude Code, drop this into `.mcp.json` at the repo root or `~/.claude.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"openclaw-tray": {
|
||||
"type": "http",
|
||||
"url": "http://127.0.0.1:8765/"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
554
docs/MISSION_CONTROL.md
Normal file
554
docs/MISSION_CONTROL.md
Normal file
@ -0,0 +1,554 @@
|
||||
# Mission Control: Topology-Aware Command Center Plan
|
||||
|
||||
This plan turns the Windows tray from a "connected/not connected" companion into a Mission Control surface for any OpenClaw gateway topology. It is based on a deep audit of:
|
||||
|
||||
- Current Windows code in this repository on `mission-control-audit`.
|
||||
- Current upstream Mac app code in `openclaw/apps/macos/Sources/OpenClaw`.
|
||||
- Current upstream gateway, node policy, browser proxy, health, presence, usage, pairing, and discovery code in `openclaw/src` and `openclaw/extensions`.
|
||||
|
||||
The main product decision is deliberate: **do not make a native Windows gateway the center of gravity.** The Windows app should be a first-class node and command center for any OpenClaw gateway: Mac over SSH tunnel, WSL, Windows Node.js, LAN, Tailscale, or unknown/remote.
|
||||
|
||||
## 1. Goals
|
||||
|
||||
1. Make the Windows tray explain *what* it is connected to: local gateway, WSL gateway, Mac via SSH tunnel, Tailscale/LAN gateway, or unknown remote.
|
||||
2. Reach deeper Mac parity by porting the valuable Mac "mission control" ideas, not just matching command names.
|
||||
3. Keep OpenClaw open and topology-neutral: the tray should observe, classify, diagnose, and repair; it should not force one gateway hosting model.
|
||||
4. Prioritize privacy and safety. Diagnostics must not trigger camera, screen recording, microphone, or broad command execution.
|
||||
5. Make every repair action copyable, explainable, and topology-aware.
|
||||
|
||||
## 2. Audit findings
|
||||
|
||||
### 2.1 Windows current state
|
||||
|
||||
Windows now has a strong foundation:
|
||||
|
||||
- Node Mode with canvas, camera, screen snapshot/record, location, device info/status, system commands, notifications, and exec approval policy.
|
||||
- Command Center status detail window with channels, sessions, usage, local/operator node inventory, allowlist diagnostics, pairing warnings, and activity stream.
|
||||
- SSH tunnel settings and service.
|
||||
- Activity Stream and support-bundle copy path that avoid storing invoke payloads.
|
||||
- Deep links including `openclaw://commandcenter`.
|
||||
|
||||
The biggest missing model is not another gateway implementation. It is **topology state**. Current settings collapse all topologies into:
|
||||
|
||||
- `GatewayUrl`
|
||||
- `UseSshTunnel`
|
||||
- SSH host/user/ports
|
||||
- `EnableNodeMode`
|
||||
|
||||
There is no first-class concept of "Mac over SSH", "WSL", "Windows native", "Tailscale", "LAN", or "unknown".
|
||||
|
||||
### 2.2 Mac Mission Control behaviors worth porting
|
||||
|
||||
The Mac app is not just a menu bar icon. It is a gateway/node/control-plane cockpit.
|
||||
|
||||
Important Mac surfaces:
|
||||
|
||||
- Status icon with activity badge and gateway error dot.
|
||||
- Hover HUD with current status and last tool/activity.
|
||||
- WebChat, Canvas, Settings, Onboarding, Agent Events, Notify Overlay, Voice/Talk overlays.
|
||||
- Menu sections for sessions, usage, cost, nodes, gateway discovery, channel state, browser control, camera/canvas/voice toggles, exec approvals, debug actions, and update status.
|
||||
- Per-session submenus with preview, thinking/verbose settings, reset, compact, delete, and log opening.
|
||||
- Per-node submenus with copy actions for node ID, name, IP, platform, versions, caps, and commands.
|
||||
- Channel settings driven by gateway schemas and channel health/probe details.
|
||||
- Debug/diagnostic actions: health check, test heartbeat, open logs, open config, open session store, restart gateway, reset SSH tunnel, port diagnostics, kill process by PID, rolling JSONL diagnostics, and verbose logging.
|
||||
|
||||
Important Mac gateway lifecycle pieces:
|
||||
|
||||
- `GatewayProcessManager` state machine: stopped, starting, running, attachedExisting, failed.
|
||||
- Attach-existing path before spawning a gateway.
|
||||
- `GatewayEnvironment`: Node runtime, OpenClaw CLI location/version, port/bind resolution.
|
||||
- `PortGuardian`: identifies listeners on gateway ports, classifies expected vs unexpected processes, and can kill with confirmation.
|
||||
- `GatewayEndpointStore`: async-stream state for local/remote/unconfigured endpoint readiness.
|
||||
- Gateway discovery via Bonjour/SRV plus Tailscale selection rules.
|
||||
- Remote SSH tunnel actor with robust SSH options, fast-fail check, random local port fallback, and tunnel reuse across app restarts.
|
||||
- Control channel with friendly error mapping and recovery scheduling.
|
||||
- Presence reporter every 180 seconds with host/IP/mode/version/platform/device fields.
|
||||
|
||||
Important Mac security/privacy pieces:
|
||||
|
||||
- Permission matrix for notifications, automation, accessibility, screen recording, microphone, speech, camera, and location.
|
||||
- Onboarding security banner warning that agents can run commands, read/write files, and capture screenshots.
|
||||
- Exec approval UX with Deny / Allow Once / Allow Always.
|
||||
- Command display sanitizer for control chars, invisible characters, and non-ASCII spaces to prevent spoofing.
|
||||
- Glob allowlist matcher semantics.
|
||||
- Host environment sanitizer with large inherited secret/toolchain blocklist, PATH override rejection, and shell-wrapper allowlist.
|
||||
- Exec approval edits with base-hash optimistic concurrency: **implemented for `system.execApprovals.get/set`; stale remote writes are rejected**
|
||||
- Pairing prompt with name, node ID, platform, app, IP, and approve/reject/later actions.
|
||||
|
||||
### 2.3 Gateway and browser proxy findings
|
||||
|
||||
`browser.proxy` is the main concrete remaining Mac node command gap.
|
||||
|
||||
Gateway/browser facts:
|
||||
|
||||
- `browser.proxy` is a canonical node command and included in Windows platform defaults at the gateway policy level.
|
||||
- Gateway policy still requires both gates:
|
||||
- command allowed by platform defaults or `gateway.nodes.allowCommands`
|
||||
- command declared by the node
|
||||
- The browser plugin/node-host contract is:
|
||||
- input: `method`, `path`, optional `query`, `body`, `timeoutMs`, `profile`
|
||||
- default timeout: 20 seconds
|
||||
- output: `{ result, files? }`
|
||||
- files are base64 payloads with path/mime metadata
|
||||
- Persistent profile mutations are blocked at gateway and node-host levels.
|
||||
- Mac implements `browser.proxy` only for local mode, proxying to `127.0.0.1:{gatewayPort+2}` with Bearer or `x-openclaw-password` auth, and a 10 MB/file extraction cap.
|
||||
- Windows managed SSH tunnel mode now forwards both the gateway port and the browser-control companion port (`local+2` to `remote+2`) when the browser proxy capability is enabled, so Mac-over-SSH topologies can satisfy the same local-only browser proxy contract.
|
||||
|
||||
Gateway APIs and signals worth surfacing:
|
||||
|
||||
- `hello-ok` snapshot/policy fields, including tick interval and limits.
|
||||
- `health`, `presence`, `tick`, `status`, `system-presence`, `sessions.*`, `usage.status`, `usage.cost`, `sessions.usage*`, `node.list`, `node.describe`, pairing APIs, and config/wizard APIs.
|
||||
- Snapshot fields such as presence, health, stateVersion, uptimeMs, auth/session defaults.
|
||||
- Non-loopback gateway security expectations: use `wss`, auth/trusted proxy, and explicit Control UI origins.
|
||||
- Discovery signals: mDNS/SRV, wide-area DNS-SD, Tailscale modes.
|
||||
|
||||
## 3. Topology model
|
||||
|
||||
### 3.1 Gateway kinds
|
||||
|
||||
Initial enum:
|
||||
|
||||
| Kind | Meaning | Detection signals |
|
||||
|---|---|---|
|
||||
| `MacOverSsh` | Localhost URL backed by an SSH tunnel to a Mac/remote host | `UseSshTunnel=true`, localhost gateway URL, SSH host present; future: presence platform macOS |
|
||||
| `Wsl` | Gateway likely running in WSL2 | localhost URL without tunnel, `wsl.exe` available, port/listener/process hints indicate WSL |
|
||||
| `WindowsNative` | Gateway likely running directly on Windows | localhost URL without tunnel and no WSL evidence |
|
||||
| `Tailscale` | Gateway reached via Tailscale DNS/IP | host ends `.ts.net` or IP is in 100.64.0.0/10 |
|
||||
| `RemoteLan` | Gateway reached via LAN/mDNS/private host | RFC1918 IP, `.local`, or non-loopback private hostname |
|
||||
| `Remote` | Public/unknown non-local remote gateway | non-loopback public host |
|
||||
| `Unknown` | Cannot classify | invalid/missing URL or conflicting settings |
|
||||
|
||||
### 3.2 State objects
|
||||
|
||||
Additive shared models:
|
||||
|
||||
```csharp
|
||||
public enum GatewayKind
|
||||
{
|
||||
Unknown,
|
||||
WindowsNative,
|
||||
Wsl,
|
||||
MacOverSsh,
|
||||
Tailscale,
|
||||
RemoteLan,
|
||||
Remote
|
||||
}
|
||||
|
||||
public enum TunnelStatus
|
||||
{
|
||||
NotConfigured,
|
||||
Stopped,
|
||||
Starting,
|
||||
Up,
|
||||
Restarting,
|
||||
Failed
|
||||
}
|
||||
|
||||
public sealed class GatewayTopologyInfo
|
||||
{
|
||||
public GatewayKind DetectedKind { get; set; }
|
||||
public string DisplayName { get; set; }
|
||||
public string GatewayUrl { get; set; }
|
||||
public string Host { get; set; }
|
||||
public bool UsesSshTunnel { get; set; }
|
||||
public string Transport { get; set; }
|
||||
public string Detail { get; set; }
|
||||
}
|
||||
|
||||
public sealed class TunnelCommandCenterInfo
|
||||
{
|
||||
public TunnelStatus Status { get; set; }
|
||||
public string LocalEndpoint { get; set; }
|
||||
public string RemoteEndpoint { get; set; }
|
||||
public string? Host { get; set; }
|
||||
public string? User { get; set; }
|
||||
public string? LastError { get; set; }
|
||||
public DateTime? StartedAt { get; set; }
|
||||
}
|
||||
```
|
||||
|
||||
Extend `GatewayCommandCenterState` with:
|
||||
|
||||
```csharp
|
||||
public GatewayTopologyInfo Topology { get; set; } = new();
|
||||
public TunnelCommandCenterInfo? Tunnel { get; set; }
|
||||
```
|
||||
|
||||
### 3.3 Classifier rules
|
||||
|
||||
Phase 1 classifier should be pure and unit-testable:
|
||||
|
||||
1. If `UseSshTunnel` is true and SSH host is set:
|
||||
- if gateway URL host is localhost/127.0.0.1/::1, classify `MacOverSsh` for now.
|
||||
- if SSH host ends `.ts.net`, include "over Tailscale SSH" in detail but keep tunnel as the primary transport.
|
||||
2. Else if gateway URL host is localhost/127.0.0.1/::1:
|
||||
- classify `WindowsNative` initially.
|
||||
- a later WSL probe can refine to `Wsl`.
|
||||
3. Else if host ends `.ts.net` or IP is in 100.64.0.0/10:
|
||||
- classify `Tailscale`.
|
||||
4. Else if host is RFC1918, `.local`, or common private names:
|
||||
- classify `RemoteLan`.
|
||||
5. Else if host is non-empty:
|
||||
- classify `Remote`.
|
||||
6. Else:
|
||||
- classify `Unknown`.
|
||||
|
||||
Phase 2 WSL refinement:
|
||||
|
||||
- Probe `wsl.exe -l -q` with a short timeout.
|
||||
- Optional port/process detection should be cached and never block UI.
|
||||
- If localhost gateway is connected and WSL evidence is strong, classify `Wsl`.
|
||||
|
||||
## 4. Command Center UX target
|
||||
|
||||
### 4.1 Gateway/topology header card
|
||||
|
||||
Add a top card under the current status header:
|
||||
|
||||
- "Gateway: Windows native / Mac over SSH / WSL / Tailscale / LAN / Remote / Unknown"
|
||||
- URL host and transport: `ws`, `wss`, `ssh tunnel`, `tailnet`, `lan`
|
||||
- tunnel state if configured: `Up`, `Restarting`, `Failed`, `Stopped`
|
||||
- last health timestamp and gateway version/uptime once available from protocol
|
||||
|
||||
### 4.2 Diagnostics categories
|
||||
|
||||
Add categories beyond current node/channel/allowlist/parity:
|
||||
|
||||
| Category | Examples | Repair action |
|
||||
|---|---|---|
|
||||
| `topology` | Localhost URL but no local/tunnel evidence; remote plaintext `ws://`; unknown public host | Explain expected topology; copy URL/settings hints |
|
||||
| `tunnel` | SSH tunnel stopped/restarting/failed | Copy `ssh -N -L ...` command; "Reset tunnel" later |
|
||||
| `wsl` | Localhost likely backed by WSL; NAT or distro reboot may break it | Show WSL-specific diagnostic hints |
|
||||
| `tailscale` | Tailnet host but no tunnel/direct auth mismatch | Show Tailscale/wss/auth hints |
|
||||
| `browser` | `browser.proxy` disabled, policy-filtered, or missing a gateway+2 browser-control host | Explain Settings, allowlist, SSH forward, or local browser-host repair path |
|
||||
| `gateway` | stale health/stateVersion, auth error, not connected | Existing patterns plus topology-specific detail |
|
||||
|
||||
### 4.3 Tray menu badge
|
||||
|
||||
Add a small topology badge next to status:
|
||||
|
||||
- "Gateway: Connected - Mac over SSH"
|
||||
- "Gateway: Connected - Windows native"
|
||||
- "Gateway: Connected - Tailscale"
|
||||
|
||||
### 4.4 Settings hint
|
||||
|
||||
In Settings, show read-only detected topology near gateway URL/tunnel settings: **implemented with a live summary under the topology guide**
|
||||
|
||||
- detected kind
|
||||
- whether settings imply tunnel/direct
|
||||
- warning if URL/tunnel conflict
|
||||
|
||||
### 4.5 Future Mission Control pages
|
||||
|
||||
Keep `HubWindow` as the Command Center host, with pages/sections for:
|
||||
|
||||
1. Overview
|
||||
2. Gateway topology
|
||||
3. Tunnel/transport
|
||||
4. Channels
|
||||
5. Sessions
|
||||
6. Nodes/capabilities
|
||||
7. Command policy/allowlist
|
||||
8. Pairing/devices
|
||||
9. Activity/events
|
||||
10. Permissions/privacy
|
||||
11. Logs/debug/repair
|
||||
|
||||
## 5. Mac parity matrix
|
||||
|
||||
### 5.1 Node command surface
|
||||
|
||||
| Command area | Mac status | Windows status | Priority |
|
||||
|---|---|---|---|
|
||||
| Canvas core | Present | Mostly present | Verify defaults, payload names, A2UI bridge, snapshot shape |
|
||||
| Screen snapshot | Present | Present | Verify defaults: max width, format, quality, metadata |
|
||||
| Screen record | Present | Present | Verify clamps/audio fields; do not live-test without permission |
|
||||
| Camera list/snap/clip | Present | Present | Verify facing/deviceId/delay/default quality |
|
||||
| Location | Present | Present | Align error tokens and permission mode |
|
||||
| Device info/status | Present | Present | Done; keep payload shape tests |
|
||||
| System notify | Present | Present | Add overlay/priority parity later |
|
||||
| System run/which | Present | Present | Verify push event names and approval reasons |
|
||||
| Exec approvals get/set | Present | Present | Base-hash optimistic concurrency implemented |
|
||||
| Browser proxy | Present, local-only | Local bridge present; live smoke blocked until browser-control host listens on gateway+2 | Continue host setup/live-smoke guidance |
|
||||
|
||||
### 5.2 Mission Control surfaces
|
||||
|
||||
| Mac capability | Windows today | Plan |
|
||||
|---|---|---|
|
||||
| Gateway process state | Implemented for detected/managed runtimes | Command Center shows topology, gateway listener process/PID, and managed/detected SSH context; process manager remains only for a future owned local Windows gateway |
|
||||
| Endpoint store/discovery | Implemented first slice | Settings topology presets and detected topology summaries classify local, SSH, WSL, and remote gateway shapes |
|
||||
| SSH tunnel robust state | Implemented | Managed SSH tunnel status/error/runtime details surface in Settings, Command Center, support context, and restart actions |
|
||||
| PortGuardian | Partial | Read-only port diagnostics identify local listeners and owning process/PID; destructive kill actions remain intentionally absent |
|
||||
| HealthStore derived states | Implemented first slice | Command Center warnings include topology-aware gateway, tunnel, browser-control, channel, usage, and node health |
|
||||
| Nodes submenu copy actions | Implemented | Per-node copy and full node inventory copy include command groups, filtered commands, disabled settings, and parity gaps |
|
||||
| Session previews/settings | Implemented | Tray session rows include previews plus thinking/verbose, reset, compact, and delete actions |
|
||||
| Cost 30-day chart | Implemented | Command Center renders 30-day cost bars from `usage.cost` daily totals |
|
||||
| Agent events ring | Implemented | Activity Stream keeps a 400-event rich ring and support bundle window |
|
||||
| Permissions matrix | Implemented first slice | Command Center shows safe Windows privacy settings deep links without probing devices |
|
||||
| Onboarding security banner | Implemented | Setup Wizard warns about agent control of enabled local command/screen/camera/location/browser/canvas surfaces |
|
||||
| Debug actions | Implemented | Tray, Command Center, deep links, and PowerToys expose logs/config/diagnostics, health/update actions, managed SSH restart, support context, debug bundle, browser setup, and copyable diagnostics/summaries |
|
||||
| Voice/Talk | Missing | Separate roadmap track |
|
||||
| Cron/Skills settings | Missing/limited | Separate roadmap track |
|
||||
|
||||
## 6. Browser proxy feasibility
|
||||
|
||||
### 6.1 What it is
|
||||
|
||||
`browser.proxy` is not a generic HTTP proxy. It is a node command that forwards browser-plugin requests through a node-host endpoint and returns structured results and optional extracted files.
|
||||
|
||||
### 6.2 Windows options
|
||||
|
||||
1. **Local gateway/browser-host proxy parity**
|
||||
- Implement only when gateway is local or tunnel-local.
|
||||
- Proxy to `127.0.0.1:{gatewayPort+2}` like Mac.
|
||||
- Use Bearer/token or password header as gateway expects.
|
||||
- Enforce same method/path/query/body/timeout/profile contract.
|
||||
- Enforce same persistent-profile mutation block and file-size cap.
|
||||
- Best Mac parity, but depends on browser plugin host availability on Windows.
|
||||
|
||||
2. **Edge/WebView2 DevTools bridge**
|
||||
- Use WebView2/Edge DevTools protocol from the tray.
|
||||
- More Windows-native, but diverges from gateway browser extension contract.
|
||||
- Riskier and likely not the immediate parity path.
|
||||
|
||||
3. **Do not implement in tray; require browser extension node-host**
|
||||
- Keep tray focused on desktop node and command center.
|
||||
- Command Center explains why `browser.proxy` is absent and how to install/enable the browser plugin.
|
||||
- Lowest risk, but leaves a Mac command gap.
|
||||
|
||||
Recommended: investigate option 1 first, with `browser.proxy` gated to local/tunnel topologies and disabled for remote public gateways unless the upstream browser host contract says otherwise.
|
||||
|
||||
Current Windows implementation status: Windows node now advertises `browser.proxy` and forwards it to the local browser control host at `127.0.0.1:{gateway port + 2}`. It uses the gateway bearer token first and retries with the same shared secret as browser-host password/basic auth if bearer auth is rejected. Managed SSH tunnel mode also forwards the companion browser-control port (`local gateway port + 2` to `remote gateway port + 2`) when the browser proxy capability is enabled. Command Center still performs the read-only feasibility probe and warns when no compatible local browser host is listening, because the command depends on that local service being available.
|
||||
|
||||
## 7. Security and privacy requirements
|
||||
|
||||
1. Diagnostics must never take screenshots, record screen, capture camera, start microphone, or run arbitrary commands.
|
||||
2. Support bundles must not include base64 payloads, tokens, screenshots, recordings, camera data, or command arguments.
|
||||
3. Browser proxy must be local-only until we prove remote behavior is safe and intended.
|
||||
4. Exec approval UI must include command display sanitization before adding "Allow Once/Always" UX.
|
||||
5. Environment override parity should reject PATH and dangerous inherited/override keys.
|
||||
6. Pairing approvals must show identity, platform, app, IP, and repair status before approval.
|
||||
7. Allowlist repair should distinguish safe commands from privacy-sensitive commands. This is already in the Windows Command Center and should remain a product rule.
|
||||
|
||||
## 8. Implementation phases
|
||||
|
||||
### Phase 1: Topology model and gateway card
|
||||
|
||||
Files:
|
||||
|
||||
- `src/OpenClaw.Shared/Models.cs`
|
||||
- `src/OpenClaw.Shared/SettingsData.cs` if optional declared kind is persisted
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/Services/SshTunnelService.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/Windows/HubWindow.xaml`
|
||||
- `src/OpenClaw.Tray.WinUI/Windows/HubWindow.xaml.cs`
|
||||
- `tests/OpenClaw.Shared.Tests/ModelsTests.cs`
|
||||
- `tests/OpenClaw.Tray.Tests/SettingsRoundTripTests.cs` if settings change
|
||||
|
||||
Deliverables:
|
||||
|
||||
- `GatewayKind`, `TunnelStatus`, `GatewayTopologyInfo`, `TunnelCommandCenterInfo`.
|
||||
- Pure topology classifier.
|
||||
- Tunnel state/error/startedAt from `SshTunnelService`.
|
||||
- Gateway card in Command Center.
|
||||
- Topology/tunnel warnings.
|
||||
|
||||
Risk: low. No protocol changes.
|
||||
|
||||
### Phase 2: Better tunnel and WSL diagnostics
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Mac-equivalent SSH options: **implemented for tunnel startup**
|
||||
- `BatchMode=yes`
|
||||
- `ExitOnForwardFailure=yes`
|
||||
- `ServerAliveInterval=15`
|
||||
- `ServerAliveCountMax=3`
|
||||
- `TCPKeepAlive=yes`
|
||||
- Explicit tunnel states (`NotConfigured`, `Stopped`, `Starting`, `Up`, `Restarting`, `Failed`): **implemented**
|
||||
- Fast-fail detection.
|
||||
- Optional random local port fallback.
|
||||
- WSL detection helper with timeout/cache. Explicit `wsl.localhost` / `.wsl` host classification is implemented.
|
||||
- Tunnel reset action.
|
||||
|
||||
Risk: medium. Process lifecycle and port behavior need careful tests.
|
||||
|
||||
### Phase 3: Gateway self and presence model
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Parse `hello-ok` snapshot/version/policy fields: **implemented**
|
||||
- Parse/preserve presence events.
|
||||
- Show gateway version, uptime/stateVersion, auth source, presence count: **implemented in Command Center gateway card**
|
||||
- Add node/presence freshness warnings.
|
||||
|
||||
Risk: low-medium; mostly parsing and UI.
|
||||
|
||||
### Phase 4: Mac-like diagnostics and repair
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Debug/Mission Control actions:
|
||||
- open log: **implemented as Open Logs folder**
|
||||
- open config folder: **implemented**
|
||||
- open session store
|
||||
- run health now: **implemented as Refresh Health**
|
||||
- send test heartbeat
|
||||
- reset managed SSH tunnel: **implemented as Restart SSH Tunnel when Settings owns the tunnel**
|
||||
- restart local gateway if topology is WindowsNative and managed
|
||||
- copy privacy-safe support context: **implemented**
|
||||
- Rolling diagnostics JSONL with rotation: **implemented for privacy-safe app/connection/gateway/tunnel metadata**
|
||||
- Port diagnostics table: **read-only local listener visibility implemented, including owning PID/process name when Windows exposes it**
|
||||
- Manual SSH tunnel detection: **implemented Command Center classification for loopback gateway ports owned by `ssh`, so hand-started local forwards are not mislabeled as native Windows gateways**
|
||||
- Gateway runtime owner summary: **implemented in Command Center topology/support context so local gateway or SSH-forward listener process name, PID, and port are visible without managing the process**
|
||||
- Browser proxy SSH forward warning: **implemented targeted Command Center guidance when an SSH tunnel gateway is up but the companion `gateway port + 2` browser-control forward is missing**
|
||||
- Browser proxy invoke error guidance: **implemented `browser.proxy` unreachable/timeout errors that name `127.0.0.1:{gateway+2}` and show the exact SSH local-forward shape**
|
||||
- Settings SSH browser-forward guidance: **implemented Settings copy explaining that the managed SSH tunnel forwards `local-port+2` to `remote-port+2` for `browser.proxy` when the browser proxy bridge is enabled**
|
||||
- Settings SSH test tunnel parity: **implemented temporary Settings test tunnels with the same optional browser-control `local+2` forward runtime uses when Browser proxy bridge is enabled**
|
||||
- Settings SSH tunnel preview: **implemented selectable Settings preview of the exact managed `ssh -N -L ...` command, including the optional browser-control companion forward**
|
||||
- Browser proxy disabled guidance: **implemented a specific Command Center warning/copy hint when `browser.proxy` is intentionally disabled in Settings**
|
||||
- Asymmetric SSH browser guidance: **fixed Command Center and `browser.proxy` invoke guidance so local `gateway+2` and remote `gateway+2` can differ**
|
||||
- SSH local browser-port source: **fixed Command Center browser diagnostics to derive the local browser-control port from the active tunnel local endpoint instead of stale saved gateway URLs**
|
||||
- Browser-control host runtime smoke: **verified the upstream browser-control host can listen locally on `127.0.0.1:{gateway+2}`, return HTTP 200 from `/` and `/tabs`, and appear in Command Center port diagnostics with owning PID/process**
|
||||
- Browser proxy auth guidance: **implemented warnings for QR/bootstrap-paired Windows nodes that advertise `browser.proxy` without a saved gateway shared token, and clarified invoke errors for missing versus mismatched browser-control auth**
|
||||
|
||||
Risk: medium-high for kill/restart actions; start as read-only/copy actions.
|
||||
|
||||
### Phase 5: Node command byte-for-byte parity audit fixes
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Verify and align canvas/screen/camera/location/system payload defaults and error tokens.
|
||||
- Verify push event names for exec.
|
||||
- Add missing base-hash concurrency semantics if needed: **implemented for remote exec approval policy edits**
|
||||
- Add `browser.proxy` feasibility prototype or explicit "not implemented" install guidance: **local browser-control bridge implemented; host runtime and Command Center listener detection smoke-tested; remaining end-to-end invoke blocker is matching operator/gateway auth for the active gateway**
|
||||
|
||||
Risk: varies; `browser.proxy` is medium-high.
|
||||
|
||||
### Phase 6: Security/privacy UX parity
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Windows permission matrix with deep links:
|
||||
- camera
|
||||
- microphone
|
||||
- location
|
||||
- notifications
|
||||
- broad file system access if relevant
|
||||
- screen capture/graphics capture guidance
|
||||
- First read-only Command Center slice is implemented. It surfaces these settings pages and explanatory rows, but intentionally does not query, request, or exercise device permissions.
|
||||
- Capability diagnostics copy is implemented for declared commands, gateway allowlist status, and privacy-sensitive opt-ins.
|
||||
- Mac-style onboarding security warning: **implemented in Setup Wizard Node Mode step, warning users that approved agents can run local commands and access enabled screen/camera/location/browser/canvas surfaces**
|
||||
- Topology choice onboarding: **first Settings guide implemented with local, WSL, SSH tunnel, and remote/Tailscale presets**
|
||||
- Exec approval dialog with sanitizer and three-button flow: **implemented for local `Prompt` policy decisions with Allow once / Always allow / Deny**
|
||||
- Exec approval remote-policy hardening: **implemented guardrails so `system.execApprovals.set` cannot remotely switch to default allow, install broad/dangerous allow rules, or overwrite a newer local policy without a matching `baseHash`**
|
||||
- Host env sanitizer parity hardening: **implemented expanded blocking for secret-looking overrides such as tokens, passwords, API keys, access keys, private keys, client secrets, and connection strings**
|
||||
- Dangerous command opt-in guidance: **implemented copyable safety guidance for camera/screen privacy-sensitive commands without emitting one-click dangerous repair commands**
|
||||
- Node capability settings: **implemented Settings toggles for canvas, screen, camera, location, and browser proxy command groups so privacy-sensitive surfaces can be disabled before reconnecting/re-pairing**
|
||||
- Disabled capability diagnostics: **implemented Command Center distinction between intentionally disabled Settings groups and true gateway allowlist/parity gaps**
|
||||
- Browser proxy policy diagnostics: **implemented a specific Command Center warning/copy action for declared `browser.proxy` commands filtered by gateway policy, instead of burying them under generic blocked-command output**
|
||||
|
||||
Risk: high for exec/security. Do not rush.
|
||||
|
||||
### Phase 7: Mission Control depth
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Session previews with thinking/verbose controls.
|
||||
- Cost 30-day bars: **implemented in Command Center from `usage.cost` daily totals**
|
||||
- Node copy submenus / summaries: **implemented first Command Center copy action**
|
||||
- Channel health summary and copyable context: **implemented first summary plus Command Center start/stop actions**
|
||||
- Channel schema forms and QR login flows: **implemented first Windows surface with channel setup/dashboard deep links and copyable channel context**
|
||||
- Skills/Cron settings: **implemented first Windows surface with Command Center dashboard entrypoints and copyable guidance**
|
||||
- Agent events ring expansion: **implemented first Command Center recent-activity panel with copy/open-stream actions**
|
||||
- Hover HUD / richer tray tooltip: **implemented with topology, channel, node, warning, and activity summary**
|
||||
- Update status: **implemented in Command Center support/debug section and copied support context, including current version, latest prompted version when known, and last check outcome**
|
||||
|
||||
Risk: medium; mostly UI and gateway method plumbing.
|
||||
|
||||
### Phase 8: Optional local Windows gateway convenience
|
||||
|
||||
This is optional and should not block Mission Control.
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Detect existing local Windows gateway.
|
||||
- Attach to it and show logs/version/port.
|
||||
- Only if user opts in: start/stop/restart a managed local gateway.
|
||||
|
||||
Risk: high. Requires Node/runtime/version/process ownership. Keep separate from topology-aware Command Center.
|
||||
|
||||
## 9. Test strategy
|
||||
|
||||
### Unit tests
|
||||
|
||||
- Topology classifier matrix:
|
||||
- localhost/no tunnel -> WindowsNative
|
||||
- localhost/tunnel -> MacOverSsh
|
||||
- `.ts.net` -> Tailscale
|
||||
- 100.64.0.0/10 -> Tailscale
|
||||
- 192.168/10/172.16/172.31 -> RemoteLan
|
||||
- `.local` -> RemoteLan
|
||||
- public host -> Remote
|
||||
- invalid/missing -> Unknown
|
||||
- Tunnel info state mapping.
|
||||
- Diagnostic sorting/dedupe with topology/tunnel warnings.
|
||||
- Settings round-trip if new persisted fields are added.
|
||||
- Existing capability and command-center tests stay green.
|
||||
|
||||
### Safe live tests
|
||||
|
||||
No screen recording, camera capture, or microphone.
|
||||
|
||||
1. Mac gateway over SSH tunnel:
|
||||
- Enable tunnel.
|
||||
- Expect Command Center topology: Mac over SSH.
|
||||
- Expect tunnel state: Up.
|
||||
- Health/channel events continue.
|
||||
2. Localhost without tunnel:
|
||||
- Expect Windows native until WSL detection exists.
|
||||
- If no gateway, show clear connection warning.
|
||||
3. Tailscale URL:
|
||||
- Use a synthetic settings profile or non-invasive connection check.
|
||||
- Expect topology classification only.
|
||||
4. Remote LAN URL:
|
||||
- Expect Remote LAN classification.
|
||||
5. Tunnel failure:
|
||||
- Stop only the known SSH process if started by the app.
|
||||
- Expect tunnel warning/restart state.
|
||||
6. Allowlist regression:
|
||||
- Safe repair remains copyable.
|
||||
- Dangerous camera/screen commands remain informational.
|
||||
|
||||
### Required validation
|
||||
|
||||
After code changes:
|
||||
|
||||
```powershell
|
||||
.\build.ps1
|
||||
dotnet test .\tests\OpenClaw.Shared.Tests\OpenClaw.Shared.Tests.csproj --no-restore
|
||||
dotnet test .\tests\OpenClaw.Tray.Tests\OpenClaw.Tray.Tests.csproj --no-restore
|
||||
```
|
||||
|
||||
## 10. Open questions
|
||||
|
||||
1. Should `DeclaredGatewayKind` be a persisted user hint, or should detection remain purely derived?
|
||||
2. Should Mac-over-SSH be named `SshTunnel` until presence confirms a Mac platform?
|
||||
3. Should `browser.proxy` live in the tray, or should Command Center guide users to install/enable the browser plugin host?
|
||||
4. Do we want a future "managed local gateway" mode, or only "detected local gateway"?
|
||||
5. How much Tailscale integration should Windows own vs merely detect?
|
||||
6. Should WSL detection use process/port probing, `wsl.exe`, or gateway presence fields once available?
|
||||
7. Should support bundles include topology/tunnel diagnostics by default, and how should they redact host/user/IP? **Implemented for Command Center copy support context with redacted gateway URL, topology detail, tunnel endpoints/errors, and port details.**
|
||||
|
||||
## 11. Immediate recommendation
|
||||
|
||||
Implement Phase 1 now:
|
||||
|
||||
- Add topology/tunnel models and classifier.
|
||||
- Surface them in Command Center.
|
||||
- Add topology/tunnel warnings.
|
||||
- Keep everything read-only and diagnostic.
|
||||
|
||||
This is the cleanest bridge between today's working Command Center and the Mac-style Mission Control product vision. It does not require a native Windows gateway, protocol changes, or privacy-sensitive live tests.
|
||||
|
||||
105
docs/ONBOARDING_WIZARD.md
Normal file
105
docs/ONBOARDING_WIZARD.md
Normal file
@ -0,0 +1,105 @@
|
||||
# Onboarding Wizard
|
||||
|
||||
The onboarding wizard is a guided 6-screen setup experience for new Windows users, matching the macOS onboarding flow.
|
||||
|
||||
## Overview
|
||||
|
||||
On first launch (or when no gateway token is configured), the wizard walks users through:
|
||||
|
||||
1. **Welcome** — Greeting and introduction
|
||||
2. **Connection** — Gateway selection and authentication
|
||||
3. **Wizard** — Gateway-driven configuration (AI provider, personality, channels)
|
||||
4. **Permissions** — Windows system permission review
|
||||
5. **Chat** — First conversation with the agent
|
||||
6. **Ready** — Feature summary and completion
|
||||
|
||||
The wizard adapts based on the connection mode:
|
||||
- **Local gateway**: All 6 screens (including Wizard for gateway configuration)
|
||||
- **Remote gateway**: Skips Wizard (assumes gateway is pre-configured)
|
||||
- **Configure Later**: Minimal flow — Welcome → Connection → Ready
|
||||
|
||||
## Screen Details
|
||||
|
||||
### Welcome
|
||||
Displays the OpenClaw lobster icon, app title, and a brief description. Single "Get Started" button advances to Connection.
|
||||
|
||||
### Connection
|
||||
Three connection modes via radio buttons:
|
||||
- **Local** — Pre-fills `ws://localhost:18789` for a gateway running on the same machine or in WSL
|
||||
- **Remote** — Enter a gateway URL and bootstrap token, or paste a base64url-encoded setup code
|
||||
- **Later** — Skip connection for now; configure from the tray menu after setup
|
||||
|
||||
Connection testing performs a real WebSocket handshake with Ed25519 device authentication. Status feedback shows connecting, connected, pairing required, token mismatch, or timeout.
|
||||
|
||||
When pairing approval is required, the wizard displays the gateway CLI approval command, copies it to the clipboard, and shows a notification with a copy action. Approval still happens through the gateway's normal `openclaw devices approve <device-id>` flow; the Windows tray does not edit gateway pairing state directly.
|
||||
|
||||
### Wizard
|
||||
Renders server-defined setup steps via RPC (`wizard.start` / `wizard.next`). The gateway controls the flow — steps can be:
|
||||
- **Note** — informational messages
|
||||
- **Confirm** — yes/no decisions
|
||||
- **Text** — free-form input (with PasswordBox for sensitive fields like API keys)
|
||||
- **Select** — radio button choices (e.g., AI provider selection)
|
||||
- **Progress** — loading indicator for background operations
|
||||
|
||||
If the gateway doesn't support the wizard protocol or is unreachable, this screen shows an "offline" message and can be skipped.
|
||||
|
||||
### Permissions
|
||||
Checks 5 Windows permissions using native APIs and registry:
|
||||
- Notifications (Toast capability)
|
||||
- Camera (Windows.Devices.Enumeration)
|
||||
- Microphone (Windows.Devices.Enumeration)
|
||||
- Screen Capture (Graphics.Capture)
|
||||
- Location (optional, registry-based)
|
||||
|
||||
Each permission shows its current status (Enabled/Disabled/Allowed/Denied) with an "Open Settings" button linking to the relevant `ms-settings:` URI.
|
||||
|
||||
### Chat
|
||||
Embeds the gateway's web chat UI via WebView2, matching the post-setup `ChatWindow` for visual consistency. Uses the shared `GatewayChatHelper` for URL building and WebView2 initialization.
|
||||
|
||||
On first load, a bootstrap message is auto-injected to kick off the gateway's first-run ritual (BOOTSTRAP.md). The message is safely encoded using `JsonSerializer.Serialize` to prevent XSS.
|
||||
|
||||
### Ready
|
||||
Displays 5 feature cards (Tray Menu, Channels, Voice, Canvas, Skills) with localized subtitles. Includes a "Launch at Login" toggle and a "Finish" button that saves settings and closes the wizard.
|
||||
|
||||
## Security
|
||||
|
||||
The onboarding wizard follows these security practices:
|
||||
|
||||
- **XSS prevention**: Bootstrap messages encoded via `JsonSerializer.Serialize` for safe JS injection
|
||||
- **Input validation**: Setup codes limited to 2KB, decoded JSON validated, gateway URLs checked via `GatewayUrlHelper`
|
||||
- **URI scheme whitelists**: Only `ms-settings:` for permissions, `http/https` for chat
|
||||
- **Navigation restriction**: WebView2 `NavigationStarting` handler blocks navigation to external origins
|
||||
- **Token protection**: Query params stripped from all log output; WebView2 accelerator keys disabled
|
||||
- **Gateway-owned pairing**: Device approval uses the gateway CLI/API path so scope checks, token issuance, audit, and broadcasts stay centralized
|
||||
- **Error sanitization**: Exception details logged but not shown to users
|
||||
|
||||
## Localization
|
||||
|
||||
All user-visible strings use `LocalizationHelper.GetString()` with the `Onboarding_*` key namespace. Supported languages are discovered from the `Strings/<locale>/Resources.resw` directories; the current locales are English, French, Dutch, Chinese Simplified, and Chinese Traditional.
|
||||
|
||||
Translations are AI-generated following the repo convention. Technical terms (Gateway, Token, Node Mode) are kept in English across all locales.
|
||||
|
||||
## Developer Guide
|
||||
|
||||
See [DEVELOPMENT.md](../DEVELOPMENT.md#developing--testing-the-onboarding-wizard) for build instructions, environment variables, and testing workflow.
|
||||
|
||||
### Test Isolation
|
||||
|
||||
`SettingsManager` loads `%APPDATA%\OpenClawTray\settings.json` by default. Onboarding tests must not use `new SettingsManager()` without an isolated settings directory, because local user settings such as `EnableNodeMode=true` change page ordering by intentionally skipping operator-only Wizard and Chat pages.
|
||||
|
||||
Use a temp settings directory for tests that construct `SettingsManager`, or set `OPENCLAW_TRAY_DATA_DIR` before the test process starts.
|
||||
|
||||
### Key Files
|
||||
|
||||
| Path | Purpose |
|
||||
|------|---------|
|
||||
| `Onboarding/OnboardingWindow.cs` | Host window with WebView2 overlay |
|
||||
| `Onboarding/OnboardingApp.cs` | Functional UI root component, page navigation |
|
||||
| `Onboarding/Services/OnboardingState.cs` | Shared state across all pages |
|
||||
| `Onboarding/Pages/*.cs` | Individual wizard screens |
|
||||
| `Onboarding/Services/SetupCodeDecoder.cs` | Base64url setup code parsing |
|
||||
| `Onboarding/Services/InputValidator.cs` | Security input validation |
|
||||
| `Onboarding/Services/WizardStepParser.cs` | Wizard JSON step parsing |
|
||||
| `Onboarding/Services/LocalGatewayApprover.cs` | Local gateway URL classification |
|
||||
| `Onboarding/Services/PermissionChecker.cs` | Windows permission checks |
|
||||
| `Helpers/GatewayChatHelper.cs` | Shared WebView2 chat URL builder |
|
||||
140
docs/POWERTOYS.md
Normal file
140
docs/POWERTOYS.md
Normal file
@ -0,0 +1,140 @@
|
||||
# PowerToys Command Palette — OpenClaw Extension
|
||||
|
||||
The OpenClaw Command Palette extension integrates with [PowerToys Command Palette](https://learn.microsoft.com/windows/powertoys/command-palette) to give you fast keyboard-driven access to OpenClaw from anywhere on your desktop.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [PowerToys](https://github.com/microsoft/PowerToys) installed (v0.90 or later recommended — this is the version that shipped Command Palette).
|
||||
- OpenClaw Tray (Molty) installed and configured.
|
||||
|
||||
## Installation
|
||||
|
||||
### Via the OpenClaw Installer (recommended)
|
||||
|
||||
When running the OpenClaw Tray installer, tick the **"Install PowerToys Command Palette extension"** checkbox. The installer will register the extension automatically.
|
||||
|
||||
### Manual Registration
|
||||
|
||||
If you installed without the Command Palette option, or need to re-register after a repair:
|
||||
|
||||
1. Open **PowerShell** (no admin needed).
|
||||
2. Run:
|
||||
|
||||
```powershell
|
||||
Add-AppxPackage -Register "$env:LOCALAPPDATA\OpenClawTray\CommandPalette\AppxManifest.xml" -ForceApplicationShutdown
|
||||
```
|
||||
|
||||
3. Restart PowerToys if it was running.
|
||||
|
||||
### Verifying Registration
|
||||
|
||||
Open Command Palette (`Win+Alt+Space`), type **"OpenClaw"** — you should see the OpenClaw commands appear.
|
||||
|
||||
## Available Commands
|
||||
|
||||
| Command | Action |
|
||||
|---------|--------|
|
||||
| **🦞 Open Dashboard** | Opens the OpenClaw web dashboard in your default browser |
|
||||
| **💬 Dashboard: Sessions** | Opens the sessions dashboard |
|
||||
| **📡 Dashboard: Channels** | Opens the channel configuration dashboard |
|
||||
| **🧩 Dashboard: Skills** | Opens the skills dashboard |
|
||||
| **⏱️ Dashboard: Cron** | Opens the scheduled jobs dashboard |
|
||||
| **💬 Web Chat** | Opens the embedded Chat page in OpenClaw Tray |
|
||||
| **📝 Quick Send** | Opens the Quick Send dialog to compose a message |
|
||||
| **🧭 Setup Wizard** | Opens QR, setup code, and manual gateway pairing |
|
||||
| **🧭 Command Center** | Opens gateway, tunnel, node, browser, and support diagnostics |
|
||||
| **🔄 Run Health Check** | Refreshes gateway or node connection health |
|
||||
| **⬇️ Check for Updates** | Runs a manual GitHub Releases update check |
|
||||
| **⚡ Activity Stream** | Opens recent tray activity and support bundle actions |
|
||||
| **📋 Notification History** | Opens recent OpenClaw tray notifications in the Activity page |
|
||||
| **⚙️ Settings** | Opens the OpenClaw Tray Settings page |
|
||||
| **📄 Open Log File** | Opens the current OpenClaw Tray log |
|
||||
| **📁 Open Logs Folder** | Opens the OpenClaw Tray logs folder |
|
||||
| **🗂️ Open Config Folder** | Opens the OpenClaw Tray configuration folder |
|
||||
| **🧪 Open Diagnostics Folder** | Opens the diagnostics JSONL folder |
|
||||
| **📋 Copy Support Context** | Copies redacted Command Center support metadata |
|
||||
| **🧰 Copy Debug Bundle** | Copies combined support, port, capability, node, channel, and activity diagnostics |
|
||||
| **🌐 Copy Browser Setup** | Copies browser.proxy and node-host setup guidance |
|
||||
| **🔌 Copy Port Diagnostics** | Copies gateway/browser/tunnel port owners and stop hints |
|
||||
| **🛡️ Copy Capability Diagnostics** | Copies permission, allowlist, and parity diagnostics |
|
||||
| **🖥️ Copy Node Inventory** | Copies node capabilities, commands, and policy status |
|
||||
| **📡 Copy Channel Summary** | Copies channel health and start/stop availability |
|
||||
| **⚡ Copy Activity Summary** | Copies recent tray activity |
|
||||
| **🧩 Copy Extensibility Summary** | Copies channel, skills, and cron surface guidance |
|
||||
| **🔁 Restart SSH Tunnel** | Restarts the tray-managed SSH tunnel when enabled |
|
||||
|
||||
## Usage
|
||||
|
||||
1. Press `Win+Alt+Space` to open Command Palette.
|
||||
2. Type `OpenClaw` (or just `oc`) to filter to OpenClaw commands.
|
||||
3. Select the action with arrow keys and press `Enter`.
|
||||
|
||||
Commands are also surfaced as deep links — you can invoke them from a browser or script using `openclaw://` URIs (see [SETUP.md](./SETUP.md#deep-links)).
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### OpenClaw commands don't appear in Command Palette
|
||||
|
||||
1. Make sure PowerToys Command Palette is enabled: **PowerToys Settings → Command Palette → Enable Command Palette**.
|
||||
2. Try re-registering the extension (see [Manual Registration](#manual-registration) above).
|
||||
3. Restart PowerToys after registration.
|
||||
4. Check that the extension files exist at `%LOCALAPPDATA%\OpenClawTray\CommandPalette\`.
|
||||
|
||||
### Commands appear but do nothing
|
||||
|
||||
The extension communicates with OpenClaw Tray via `openclaw://` deep links. Make sure:
|
||||
- OpenClaw Tray (`OpenClaw.Tray.WinUI.exe`) is running.
|
||||
- The `openclaw://` URI scheme is registered. If not, re-run the OpenClaw Tray installer.
|
||||
|
||||
### Extension was removed after a PowerToys update
|
||||
|
||||
PowerToys updates can sometimes unregister third-party extensions. Re-register with:
|
||||
|
||||
```powershell
|
||||
Add-AppxPackage -Register "$env:LOCALAPPDATA\OpenClawTray\CommandPalette\AppxManifest.xml" -ForceApplicationShutdown
|
||||
```
|
||||
|
||||
### Unregistering the extension
|
||||
|
||||
To remove the OpenClaw extension from Command Palette without uninstalling Tray:
|
||||
|
||||
```powershell
|
||||
Get-AppxPackage -Name '*OpenClaw*' | Remove-AppxPackage
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- The extension is a **sparse MSIX package** registered per-user, so no administrator rights are required.
|
||||
- It is built against the `Microsoft.CommandPalette.Extensions` SDK and communicates with Tray exclusively via `openclaw://` deep links — there is no direct IPC between the extension and Tray.
|
||||
- Command Palette extension commands and their deep link targets:
|
||||
|
||||
| Command | Deep link |
|
||||
|---------|-----------|
|
||||
| Open Dashboard | `openclaw://dashboard` |
|
||||
| Dashboard: Sessions | `openclaw://dashboard/sessions` |
|
||||
| Dashboard: Channels | `openclaw://dashboard/channels` |
|
||||
| Dashboard: Skills | `openclaw://dashboard/skills` |
|
||||
| Dashboard: Cron | `openclaw://dashboard/cron` |
|
||||
| Web Chat | `openclaw://chat` |
|
||||
| Quick Send | `openclaw://send` |
|
||||
| Setup Wizard | `openclaw://setup` |
|
||||
| Command Center | `openclaw://commandcenter` |
|
||||
| Run Health Check | `openclaw://healthcheck` |
|
||||
| Check for Updates | `openclaw://check-updates` |
|
||||
| Activity Stream | `openclaw://activity` |
|
||||
| Notification History | `openclaw://history` |
|
||||
| Settings | `openclaw://settings` |
|
||||
| Open Log File | `openclaw://logs` |
|
||||
| Open Logs Folder | `openclaw://log-folder` |
|
||||
| Open Config Folder | `openclaw://config` |
|
||||
| Open Diagnostics Folder | `openclaw://diagnostics` |
|
||||
| Copy Support Context | `openclaw://support-context` |
|
||||
| Copy Debug Bundle | `openclaw://debug-bundle` |
|
||||
| Copy Browser Setup | `openclaw://browser-setup` |
|
||||
| Copy Port Diagnostics | `openclaw://port-diagnostics` |
|
||||
| Copy Capability Diagnostics | `openclaw://capability-diagnostics` |
|
||||
| Copy Node Inventory | `openclaw://node-inventory` |
|
||||
| Copy Channel Summary | `openclaw://channel-summary` |
|
||||
| Copy Activity Summary | `openclaw://activity-summary` |
|
||||
| Copy Extensibility Summary | `openclaw://extensibility-summary` |
|
||||
| Restart SSH Tunnel | `openclaw://restart-ssh-tunnel` |
|
||||
52
docs/RELEASING.md
Normal file
52
docs/RELEASING.md
Normal file
@ -0,0 +1,52 @@
|
||||
# Releasing OpenClaw Windows Hub
|
||||
|
||||
This repo uses **GitVersion + CI** for release versioning.
|
||||
The canonical release flow is **tag-driven**, not manual file patching.
|
||||
|
||||
## TL;DR
|
||||
|
||||
1. Merge approved changes into `master`.
|
||||
2. Create and push a semantic tag:
|
||||
```powershell
|
||||
git checkout master
|
||||
git pull --ff-only origin master
|
||||
git tag -a vX.Y.Z -m "Release vX.Y.Z"
|
||||
git push origin master
|
||||
git push origin vX.Y.Z
|
||||
```
|
||||
3. CI (`.github/workflows/ci.yml`) builds/signs/publishes artifacts and creates the GitHub release from that tag.
|
||||
|
||||
## Why this is the correct flow
|
||||
|
||||
- `GitVersion.yml` is configured for `ContinuousDelivery` with `tag-prefix: 'v'`.
|
||||
- CI computes version from git history/tags and passes it to builds (`-p:Version=...`).
|
||||
- CI patches MSIX manifest version during build, so releases are consistent across EXE/MSIX assets.
|
||||
|
||||
## Important rules
|
||||
|
||||
- **Do not manually bump** version files for routine releases:
|
||||
- `src/OpenClaw.Tray/OpenClaw.Tray.csproj`
|
||||
- `src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj`
|
||||
- `src/OpenClaw.Tray.WinUI/Package.appxmanifest`
|
||||
- Treat csproj `<Version>` as a **local fallback** for dev builds.
|
||||
- Release versions should come from the **tag** (`vX.Y.Z`).
|
||||
|
||||
## Verify release pipeline
|
||||
|
||||
After pushing a tag, confirm in GitHub Actions:
|
||||
- workflow: **Build and Test**
|
||||
- trigger ref: `refs/tags/vX.Y.Z`
|
||||
- jobs complete successfully (build, build-msix, release)
|
||||
- release assets are attached to the tag release
|
||||
|
||||
## If you need to retag
|
||||
|
||||
If a tag points to the wrong commit:
|
||||
|
||||
```powershell
|
||||
git tag -d vX.Y.Z
|
||||
git push origin :refs/tags/vX.Y.Z
|
||||
git tag -a vX.Y.Z -m "Release vX.Y.Z"
|
||||
git push origin vX.Y.Z
|
||||
```
|
||||
|
||||
190
docs/SETUP.md
Normal file
190
docs/SETUP.md
Normal file
@ -0,0 +1,190 @@
|
||||
# OpenClaw Tray — Installation & Setup Guide
|
||||
|
||||
This guide covers installing OpenClaw Tray (Molty) on Windows using the pre-built installer. For building from source, see [DEVELOPMENT.md](../DEVELOPMENT.md).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before installing, make sure you have:
|
||||
|
||||
- **Windows 10 (20H2 or later)** or **Windows 11**
|
||||
- **WebView2 Runtime** — pre-installed on Windows 11 and most up-to-date Windows 10 systems. If missing, download from [Microsoft Edge WebView2](https://developer.microsoft.com/microsoft-edge/webview2/).
|
||||
- An active **OpenClaw account** with a gateway token — sign up at [openclaw.ai](https://openclaw.ai).
|
||||
|
||||
## Step-by-Step Installation
|
||||
|
||||
### 1. Download the Installer
|
||||
|
||||
Go to the [Releases page](https://github.com/openclaw/openclaw-windows-node/releases) and download the latest installer for your architecture:
|
||||
|
||||
| File | Architecture |
|
||||
|------|-------------|
|
||||
| `OpenClawTray-Setup-x64.exe` | Intel / AMD (most PCs) |
|
||||
| `OpenClawTray-Setup-arm64.exe` | ARM64 (Surface Pro X, Snapdragon laptops) |
|
||||
|
||||
If you're unsure, use the **x64** installer.
|
||||
|
||||
### 2. Run the Installer
|
||||
|
||||
Double-click the downloaded `.exe`. Windows may show a SmartScreen prompt — click **More info → Run anyway** (this is normal for code-signed apps that haven't yet accumulated reputation).
|
||||
|
||||
The installer runs without requiring administrator privileges.
|
||||
|
||||
### 3. Choose Optional Components
|
||||
|
||||
The installer offers two optional components:
|
||||
|
||||
- **Create Desktop Icon** — adds a shortcut to your desktop.
|
||||
- **Start OpenClaw Tray when Windows starts** — launches Molty automatically at login (recommended).
|
||||
- **Install PowerToys Command Palette extension** — enables OpenClaw commands in PowerToys Command Palette (requires [PowerToys](https://github.com/microsoft/PowerToys) to be installed). See [POWERTOYS.md](./POWERTOYS.md) for details.
|
||||
|
||||
### 4. First Launch
|
||||
|
||||
After the installer finishes, OpenClaw Tray starts automatically. Look for the 🦞 lobster icon in the system tray (bottom-right corner of the taskbar, near the clock).
|
||||
|
||||
If you don't see it, check the **hidden icons** area (the `^` arrow next to the tray).
|
||||
|
||||
### 5. Onboarding Wizard
|
||||
|
||||
On first launch, Molty opens a **6-screen onboarding wizard** that walks you through setup:
|
||||
|
||||
1. **Welcome** — A friendly greeting introducing OpenClaw and Molty. Click **Get Started** to begin.
|
||||
|
||||
2. **Connection** — Choose how to connect to your gateway:
|
||||
- **Local** — Select this if the gateway runs on the same machine or in WSL. The URL is pre-filled to `ws://localhost:18789`.
|
||||
- **Remote** — Enter your gateway URL and bootstrap token manually, **or** paste a base64url-encoded **setup code** (a single string containing both URL and token).
|
||||
- **Later** — Skip connection setup for now. You can configure it later from the tray menu → Settings.
|
||||
|
||||
After entering your details, click **Test Connection**. The wizard performs a real WebSocket handshake with Ed25519 device authentication and shows real-time status feedback (connecting → connected → pairing).
|
||||
|
||||
3. **Wizard** — If your gateway supports it, this screen walks you through gateway-driven configuration steps (AI provider selection, personality setup, communication channels). The steps are defined by your gateway via RPC. If the gateway doesn't support wizard mode, this screen is skipped automatically.
|
||||
|
||||
4. **Permissions** — Reviews Windows system permissions needed for full functionality:
|
||||
- **Notifications** — for toast alerts
|
||||
- **Camera** — for camera capture
|
||||
- **Microphone** — for voice input
|
||||
- **Screen Capture** — for screenshots
|
||||
- **Location** — optional, for location-aware features; packaged installs declare this capability so Windows may prompt for location consent the first time it is used
|
||||
|
||||
Each permission shows its current status. Click **Open Settings** next to any permission to jump directly to the relevant Windows Settings page.
|
||||
|
||||
5. **Chat** — Meet your agent! This screen opens a live chat powered by the gateway's web UI. A bootstrap message is sent automatically to kick off your first conversation.
|
||||
|
||||
6. **Ready** — A summary of available features (tray menu, channels, voice, canvas, skills). Toggle **Launch at Login** to start Molty with Windows, then click **Finish** to complete setup.
|
||||
|
||||
After the wizard, the tray icon turns green when connected. You can re-run the wizard or change settings anytime from the tray menu.
|
||||
|
||||
## Tray Icon Status
|
||||
|
||||
| Icon colour | Meaning |
|
||||
|-------------|---------|
|
||||
| 🟢 Green | Connected to gateway |
|
||||
| 🟡 Amber | Connecting / reconnecting |
|
||||
| 🔴 Red | Error |
|
||||
| ⚫ Grey | Disconnected |
|
||||
|
||||
Left-click the icon to open the quick-access menu. Right-click for context options.
|
||||
|
||||
## Deep Links
|
||||
|
||||
OpenClaw Tray responds to `openclaw://` deep links, which can be invoked from a browser or another app:
|
||||
|
||||
| Link | Action |
|
||||
|------|--------|
|
||||
| `openclaw://dashboard` | Open the OpenClaw web dashboard |
|
||||
| `openclaw://dashboard/sessions` | Open the sessions dashboard page |
|
||||
| `openclaw://dashboard/channels` | Open the channels dashboard page |
|
||||
| `openclaw://dashboard/skills` | Open the skills dashboard page |
|
||||
| `openclaw://dashboard/cron` | Open the cron dashboard page |
|
||||
| `openclaw://chat` | Open the embedded Chat page |
|
||||
| `openclaw://send` | Open the Quick Send dialog |
|
||||
| `openclaw://send?message=Hello` | Open Quick Send with pre-filled text |
|
||||
| `openclaw://settings` | Open the Settings page |
|
||||
| `openclaw://setup` | Open the Setup Wizard |
|
||||
| `openclaw://commandcenter` | Open Command Center diagnostics |
|
||||
| `openclaw://activity` | Open the Activity page |
|
||||
| `openclaw://history` | Open the Activity page filtered to notification history |
|
||||
| `openclaw://healthcheck` | Run a manual health check |
|
||||
| `openclaw://check-updates` | Run a manual update check |
|
||||
| `openclaw://logs` | Open the current tray log file |
|
||||
| `openclaw://log-folder` | Open the logs folder |
|
||||
| `openclaw://config` | Open the config folder |
|
||||
| `openclaw://diagnostics` | Open the diagnostics JSONL folder |
|
||||
| `openclaw://support-context` | Copy redacted support context |
|
||||
| `openclaw://debug-bundle` | Copy a combined debug bundle for support |
|
||||
| `openclaw://browser-setup` | Copy browser.proxy/browser-control setup guidance |
|
||||
| `openclaw://port-diagnostics` | Copy gateway/browser/tunnel port diagnostics with owner PID stop hints |
|
||||
| `openclaw://capability-diagnostics` | Copy permissions, allowlist, and parity diagnostics |
|
||||
| `openclaw://node-inventory` | Copy node capabilities, commands, and policy status |
|
||||
| `openclaw://channel-summary` | Copy channel health and start/stop availability |
|
||||
| `openclaw://activity-summary` | Copy recent tray activity for troubleshooting |
|
||||
| `openclaw://extensibility-summary` | Copy channel, skills, and cron dashboard surface guidance |
|
||||
| `openclaw://restart-ssh-tunnel` | Restart the tray-managed SSH tunnel when enabled |
|
||||
| `openclaw://agent?message=Hello` | Send a message directly to the connected gateway |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Tray icon doesn't appear
|
||||
|
||||
1. Check Task Manager for `OpenClaw.Tray.WinUI.exe` — if it's running, the icon may be hidden.
|
||||
2. Drag the icon out of the hidden overflow area to always show it.
|
||||
3. If the process isn't running, try launching from Start Menu → **OpenClaw Tray**.
|
||||
|
||||
### "WebView2 Runtime is missing" error
|
||||
|
||||
Download and install WebView2 from [Microsoft](https://developer.microsoft.com/microsoft-edge/webview2/). The **Evergreen Standalone Installer** is the easiest option.
|
||||
|
||||
### Can't connect to gateway
|
||||
|
||||
- Verify the gateway URL in Settings (default: `ws://localhost:18789`).
|
||||
- Make sure the OpenClaw gateway process is running.
|
||||
- Check Windows Firewall — if your gateway runs on a different machine, allow inbound traffic on port 18789.
|
||||
- See the log at `%LOCALAPPDATA%\OpenClawTray\openclaw-tray.log` for connection errors.
|
||||
|
||||
### "Not yet paired" message on reconnect
|
||||
|
||||
If the tray shows **Pending approval** after reconnecting, run the approval command shown in the tray or log:
|
||||
|
||||
```
|
||||
openclaw devices approve <device-id>
|
||||
```
|
||||
|
||||
See [issue #81](https://github.com/openclaw/openclaw-windows-node/issues/81) for context on this flow.
|
||||
|
||||
### Setup code doesn't work
|
||||
|
||||
- Make sure you paste the **entire** setup code — it's a single base64url-encoded string.
|
||||
- Check for accidental leading/trailing whitespace.
|
||||
- The code must be from a compatible gateway version. Try entering the gateway URL and token manually instead.
|
||||
|
||||
### Connection test fails
|
||||
|
||||
- Verify the gateway URL is correct (e.g., `ws://localhost:18789` for local, or the full URL for remote).
|
||||
- Check that your token is valid and hasn't expired.
|
||||
- If the gateway is on another machine, ensure Windows Firewall allows traffic on the gateway port.
|
||||
- See the log at `%LOCALAPPDATA%\OpenClawTray\openclaw-tray.log` for detailed error messages.
|
||||
|
||||
### Wizard shows "offline"
|
||||
|
||||
The Wizard screen relies on the gateway's wizard protocol. If it shows offline:
|
||||
- The gateway may not support wizard mode yet — this is fine, configuration can be done later.
|
||||
- Check that the gateway is running and reachable.
|
||||
- You can skip the Wizard screen and configure your gateway manually from the tray menu → Settings.
|
||||
|
||||
### Settings are not saved
|
||||
|
||||
Settings are stored at `%APPDATA%\OpenClawTray\settings.json`. If this file is corrupt, delete it and reconfigure from scratch.
|
||||
|
||||
### Auto-start isn't working
|
||||
|
||||
1. Open Settings and toggle **Start with Windows** off, then on again.
|
||||
2. Check `HKCU\Software\Microsoft\Windows\CurrentVersion\Run` for a `OpenClawTray` entry.
|
||||
|
||||
## Updating
|
||||
|
||||
OpenClaw Tray checks for updates automatically and shows a notification when a new version is available. Click **Update** to download and apply the update. You can also manually check by re-downloading from the [Releases page](https://github.com/openclaw/openclaw-windows-node/releases).
|
||||
|
||||
## Uninstalling
|
||||
|
||||
Go to **Settings → Apps → Installed apps**, find **OpenClaw Tray**, and click **Uninstall**. Alternatively, use **Add or Remove Programs** in the Control Panel.
|
||||
|
||||
Your settings file at `%APPDATA%\OpenClawTray\settings.json` and device key at `%LOCALAPPDATA%\OpenClawTray\device-key-ed25519.json` are not removed automatically — delete them manually if you want a clean uninstall.
|
||||
@ -1,17 +1,17 @@
|
||||
# Test Coverage Summary
|
||||
|
||||
**571 tests total** (478 shared + 93 tray) — all passing ✅
|
||||
**1570 tests total** (1182 shared + 388 tray) — all passing ✅
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Total Tests | 571 |
|
||||
| Passing | 571 (100%) |
|
||||
| Total Tests | 1570 |
|
||||
| Passing | 1570 (100%) |
|
||||
| Failing | 0 |
|
||||
| Framework | xUnit 2.9.3 / .NET 10.0 |
|
||||
|
||||
## Test Projects
|
||||
|
||||
### OpenClaw.Shared.Tests — 478 tests
|
||||
### OpenClaw.Shared.Tests — 1182 tests
|
||||
|
||||
#### ModelsTests
|
||||
- **AgentActivityTests** (~15) — glyph mapping for all ActivityKind values, display text formatting
|
||||
@ -71,29 +71,26 @@
|
||||
|
||||
---
|
||||
|
||||
### OpenClaw.Tray.Tests — 93 tests
|
||||
### OpenClaw.Tray.Tests — 388 tests
|
||||
|
||||
#### MenuDisplayHelperTests (~40)
|
||||
- `GetStatusIcon` — emoji mapping for Connected/Disconnected/Connecting/Error states
|
||||
- `GetChannelStatusIcon` — status icons for running/idle/pending/error/disconnected + case-insensitive variants
|
||||
- `GetNextToggleValue` — ON↔OFF toggling, case handling
|
||||
- Unknown/empty status fallback
|
||||
#### Core Tray Tests
|
||||
|
||||
#### MenuPositionerTests (~15)
|
||||
- Screen edge clamping (top-left, bottom-right)
|
||||
- Taskbar-at-right scenario
|
||||
- Menu positioning relative to cursor
|
||||
- **MenuDisplayHelperTests** (~40) — `GetStatusIcon` emoji mapping for Connected/Disconnected/Connecting/Error states, `GetChannelStatusIcon` status icons for running/idle/pending/error/disconnected + case-insensitive variants, `GetNextToggleValue` ON↔OFF toggling, unknown/empty status fallback
|
||||
- **MenuPositionerTests** (~15) — Screen edge clamping (top-left, bottom-right), taskbar-at-right scenario, menu positioning relative to cursor
|
||||
- **SettingsRoundTripTests** (~15) — Serialization/deserialization round trips, default values on missing keys, backward compatibility with older settings formats
|
||||
- **DeepLinkParserTests** (~23) — `ParseDeepLink` protocol validation, null/empty handling, subpath parsing, trailing slash stripping, query parameter extraction, URL-encoded message handling
|
||||
|
||||
#### SettingsRoundTripTests (~15)
|
||||
- Serialization/deserialization round trips
|
||||
- Default values on missing keys
|
||||
- Backward compatibility with older settings formats
|
||||
#### Onboarding Tests
|
||||
|
||||
#### DeepLinkParserTests (~23)
|
||||
- `ParseDeepLink` — protocol validation, null/empty handling, subpath parsing, trailing slash stripping
|
||||
- Query parameter extraction (`GetQueryParam`)
|
||||
- URL-encoded message handling
|
||||
- Multiple query parameters, missing keys
|
||||
- **OnboardingStateTests** (19) — Page order, mode logic, route changes, wizard state persistence, completion, disposal
|
||||
- **GatewayChatHelperTests** (11) — URL scheme conversion, token encoding, localhost checks, session keys
|
||||
- **LocalGatewayApproverTests** (13) — IsLocalGateway for localhost/remote/edge cases
|
||||
- **SetupCodeDecoderTests** (14) — Base64url decode, size limits, JSON validation, URL/token extraction
|
||||
- **GatewayHealthCheckTests** (6) — Health URI building, scheme conversion, port preservation
|
||||
- **SecurityValidationTests** (16) — Locale whitelist, port range, path traversal, URI scheme validation
|
||||
- **WizardStepParsingTests** (12) — JSON step parsing, options, completion, sensitive fields
|
||||
- **GatewayDiscoveryServiceTests** — mDNS host selection and connection URL regression coverage
|
||||
- **LocalizationValidationTests** — locale key parity, onboarding key presence, duplicate detection, and all-or-none translation consistency
|
||||
|
||||
---
|
||||
|
||||
@ -110,6 +107,9 @@ dotnet test tests/OpenClaw.Tray.Tests
|
||||
# Specific test class
|
||||
dotnet test --filter "FullyQualifiedName~MenuDisplayHelperTests"
|
||||
|
||||
# Onboarding tests only
|
||||
dotnet test --filter "FullyQualifiedName~Onboarding"
|
||||
|
||||
# Verbose output
|
||||
dotnet test --logger "console;verbosity=detailed"
|
||||
```
|
||||
@ -120,9 +120,10 @@ dotnet test --logger "console;verbosity=detailed"
|
||||
- Real gateway message parsing
|
||||
- Concurrent event handling
|
||||
- File I/O and thread synchronization
|
||||
- End-to-end onboarding wizard flow (WebView2 requires runtime)
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: 2026-03-18
|
||||
**Last Updated**: 2026-05-04
|
||||
**Framework**: xUnit 2.9.3 / .NET 10.0
|
||||
**Status**: ✅ 571 tests passing
|
||||
**Status**: ✅ 1570 tests passing
|
||||
|
||||
@ -38,7 +38,7 @@ Related issues: #5 (Canvas Panel), #6 (Skills Settings UI), #7 (DEVELOPMENT.md),
|
||||
| `OpenClaw.Shared` | ✅ Working | Gateway WebSocket client library (.NET) |
|
||||
| `OpenClaw.Tray.WinUI` | ✅ Working | System tray app — status, Quick Send, WebChat (WebView2), toast notifications, channel control |
|
||||
| `OpenClaw.CommandPalette` | ✅ Working | PowerToys extension for quick commands |
|
||||
| Windows Node | ✅ Implemented | Canvas, screen, camera, system.run, notifications — all working via Node Mode |
|
||||
| Windows Node | ✅ Implemented | Canvas, screen, camera, location, device info/status, system.run, notifications — all working via Node Mode |
|
||||
| Windows Gateway | ❌ Unexplored | Gateway runs in WSL2 only |
|
||||
|
||||
### How Scott uses it today
|
||||
@ -142,7 +142,7 @@ The gold standard. Everything works out of the box. This is what Windows should
|
||||
| **Setup complexity** | Medium — WSL2 + openclaw + configure tray app to point at `ws://localhost:18789` |
|
||||
| **UX Rating** | ⭐⭐⭐ Nice UI wrapper but agent still can't see or interact with Windows |
|
||||
|
||||
This is what the tray app provides *today*. Quick Send, embedded WebChat, status display. But it's a viewport into the agent, not a bridge for the agent to interact with Windows.
|
||||
This operator-only mode provides Quick Send, embedded WebChat, Command Center diagnostics, activity stream, and status display. But without Node Mode it is still a viewport into the agent, not a bridge for the agent to interact with Windows.
|
||||
|
||||
---
|
||||
|
||||
@ -152,13 +152,15 @@ This is what the tray app provides *today*. Quick Send, embedded WebChat, status
|
||||
|--------|---------|
|
||||
| **Gateway** | WSL2 (Ubuntu) |
|
||||
| **Nodes** | OpenClaw.Tray registers as `role: "node"` from Windows |
|
||||
| **Capabilities** | Camera ✅ (MediaCapture API) Canvas ✅ (WebView2) Screen ✅ (Graphics Capture) Notifications ✅ (Toast + agent-driven) Browser ❌ (WSL2 browser proxy) Exec ✅ (WSL2 + optionally Windows `cmd`/`powershell`) Location ⚠️ (Windows Location API — desktop, less useful) Audio/TTS ✅ (Windows Speech) |
|
||||
| **Capabilities** | Camera ✅ (MediaCapture API) Canvas ✅ (WebView2) Screen ✅ (Graphics Capture) Notifications ✅ (Toast + agent-driven) Browser ✅/⚠️ (local `browser.proxy` bridge; requires browser-control host on gateway port + 2) Exec ✅ (WSL2 + optionally Windows `cmd`/`powershell`) Location ⚠️ (Windows Location API — desktop, less useful) Voice/TTS ⚠️ (separate parity track) |
|
||||
| **Networking** | WSL2 NAT still involved for gateway, but tray app connects outward to WSL2's WS — simpler direction. |
|
||||
| **Setup complexity** | Medium — WSL2 gateway + tray app auto-discovers and pairs |
|
||||
| **UX Rating** | ⭐⭐⭐⭐ Agent can now see and interact with Windows! |
|
||||
|
||||
**This is the sweet spot for Phase 1.** The gateway stays in WSL2 (proven, works), but the tray app lights up all the Windows-native capabilities. The agent gains eyes and hands on Windows.
|
||||
|
||||
The tray now also has a Command Center surface that combines gateway channel health, sessions, usage/cost, node inventory, pairing state, command allowlist diagnostics, and recent invoke activity. It is read-only by default and does not invoke camera or screen commands while diagnosing capability health.
|
||||
|
||||
---
|
||||
|
||||
### Scenario 5: Windows Native Gateway + Tray App as Node ⭐⭐⭐⭐⭐
|
||||
@ -167,7 +169,7 @@ This is what the tray app provides *today*. Quick Send, embedded WebChat, status
|
||||
|--------|---------|
|
||||
| **Gateway** | Windows native (Node.js on Windows — `node.exe`) |
|
||||
| **Nodes** | OpenClaw.Tray as full Windows node |
|
||||
| **Capabilities** | Camera ✅ Canvas ✅ Screen ✅ Notifications ✅ Browser ✅ (Playwright on Windows) Exec ✅ (native `cmd.exe`, PowerShell, `wsl.exe`) Location ⚠️ Audio/TTS ✅ |
|
||||
| **Capabilities** | Camera ✅ Canvas ✅ Screen ✅ Notifications ✅ Browser ✅/⚠️ (`browser.proxy` bridge; needs browser-control host on gateway+2) Exec ✅ (native `cmd.exe`, PowerShell, `wsl.exe`) Location ⚠️ Voice/TTS ⚠️ (separate parity track) |
|
||||
| **Networking** | `ws://127.0.0.1:18789` — pure loopback, no NAT, no WSL2 networking issues |
|
||||
| **Setup complexity** | Low — `npm install -g openclaw && openclaw onboard` from PowerShell. Same as Mac. |
|
||||
| **UX Rating** | ⭐⭐⭐⭐⭐ True feature parity with Mac |
|
||||
@ -242,12 +244,13 @@ Niche scenario. If the "server" must be Windows for some reason, this works but
|
||||
|
||||
## Capability Matrix by Node Type
|
||||
|
||||
| Capability | macOS App | iOS App | Android App | WSL2 Headless | **Windows Tray (proposed)** | Windows API |
|
||||
| Capability | macOS App | iOS App | Android App | WSL2 Headless | **Windows Tray** | Windows API |
|
||||
|-----------|-----------|---------|-------------|---------------|---------------------------|-------------|
|
||||
| `canvas.present` | ✅ SwiftUI WebView | ✅ WKWebView | ✅ WebView | ❌ | **✅ WebView2** | WebView2 |
|
||||
| `canvas.snapshot` | ✅ | ✅ | ✅ | ❌ | **✅** | WebView2 CapturePreviewAsync |
|
||||
| `canvas.eval` | ✅ | ✅ | ✅ | ❌ | **✅** | WebView2 ExecuteScriptAsync |
|
||||
| `canvas.a2ui` | ✅ | ✅ | ✅ | ❌ | **⚠️ Investigating** | WebView2 |
|
||||
| `canvas.a2ui.push/reset` | ✅ | ✅ | ✅ | ❌ | **✅** | WebView2 |
|
||||
| `canvas.a2ui.pushJSONL` | ✅ | ✅ | ✅ | ❌ | **✅** | Legacy alias over A2UI push |
|
||||
| `camera.snap` | ✅ AVFoundation | ✅ AVFoundation | ✅ CameraX | ❌ | **✅** | MediaCapture + frame reader fallback |
|
||||
| `camera.clip` | ✅ | ✅ | ✅ | ❌ | **✅** | MediaCapture + MediaEncoding |
|
||||
| `camera.list` | ✅ | ✅ | ✅ | ❌ | **✅** | DeviceInformation.FindAllAsync |
|
||||
@ -255,11 +258,12 @@ Niche scenario. If the "server" must be Windows for some reason, this works but
|
||||
| `system.run` | ✅ | ❌ | ❌ | ✅ | **✅** | Process.Start (cmd/pwsh) + ExecApprovalPolicy |
|
||||
| `system.execApprovals` | ❌ | ❌ | ❌ | ❌ | **✅** | JSON policy file (exec-policy.json) |
|
||||
| `system.notify` | ✅ NSUserNotification | ✅ UNUserNotification | ✅ NotificationManager | ❌ | **✅** | ToastNotificationManager |
|
||||
| `location.get` | ✅ CLLocationManager | ✅ CLLocationManager | ✅ FusedLocation | ❌ | **⚠️** | Windows.Devices.Geolocation |
|
||||
| `location.get` | ✅ CLLocationManager | ✅ CLLocationManager | ✅ FusedLocation | ❌ | **✅** | Windows.Devices.Geolocation |
|
||||
| `device.info/status` | ✅ shared schema | ✅ shared schema | ✅ shared schema | ❌ | **✅** | .NET runtime, storage, network |
|
||||
| `sms.send` | ❌ | ❌ | ✅ | ❌ | ❌ | N/A |
|
||||
| Browser proxy | ✅ | ❌ | ❌ | ✅ Playwright | **⚠️ Future** | Playwright on Windows |
|
||||
| Browser proxy | ✅ | ❌ | ❌ | ✅ Playwright | **✅/⚠️ Local bridge** | Browser-control host on gateway port + 2 |
|
||||
| Accessibility | ✅ AX API | ❌ | ❌ | ❌ | **⚠️ Future** | UI Automation |
|
||||
| Speech/TTS | ✅ NSSpeechSynthesizer | ❌ | ❌ | ❌ | **✅** | Windows.Media.SpeechSynthesis |
|
||||
| Speech/TTS | ✅ NSSpeechSynthesizer | ❌ | ❌ | ❌ | **⚠️ Planned** | Windows.Media.SpeechSynthesis |
|
||||
| Microphone | ✅ AVAudioEngine | ✅ | ✅ | ❌ | **⚠️ Future** | Windows.Media.Audio |
|
||||
|
||||
---
|
||||
@ -270,7 +274,7 @@ For contributors: here's what implementing a Windows node means at the protocol
|
||||
|
||||
### 1. Connect as a node
|
||||
|
||||
The tray app's `OpenClawGatewayClient` currently connects as an **operator**. To become a node, it needs to send (or send an additional) `connect` with `role: "node"`:
|
||||
The tray app uses a dedicated node connection (`WindowsNodeClient`) with `role: "node"`:
|
||||
|
||||
```json
|
||||
{
|
||||
@ -288,15 +292,18 @@ The tray app's `OpenClawGatewayClient` currently connects as an **operator**. To
|
||||
},
|
||||
"role": "node",
|
||||
"scopes": [],
|
||||
"caps": ["canvas", "camera", "screen", "notifications", "system"],
|
||||
"caps": ["canvas", "camera", "screen", "notifications", "system", "device", "browser"],
|
||||
"commands": [
|
||||
"canvas.present", "canvas.hide", "canvas.navigate",
|
||||
"canvas.eval", "canvas.snapshot", "canvas.a2ui.push",
|
||||
"canvas.a2ui.reset",
|
||||
"canvas.a2ui.pushJSONL", "canvas.a2ui.reset",
|
||||
"camera.list", "camera.snap", "camera.clip",
|
||||
"screen.record",
|
||||
"system.run", "system.notify",
|
||||
"system.execApprovals.get", "system.execApprovals.set"
|
||||
"screen.snapshot", "screen.record",
|
||||
"location.get",
|
||||
"device.info", "device.status",
|
||||
"system.run", "system.run.prepare", "system.which", "system.notify",
|
||||
"system.execApprovals.get", "system.execApprovals.set",
|
||||
"browser.proxy"
|
||||
],
|
||||
"permissions": {
|
||||
"camera.capture": true,
|
||||
@ -469,6 +476,10 @@ var stream = await synth.SynthesizeTextToStreamAsync(text);
|
||||
// Play via MediaElement or save to file
|
||||
```
|
||||
|
||||
This is a candidate implementation path, not an implemented node command yet. Voice/Talk mode parity should stay on its own track so Windows does not advertise a speech capability before there is a shared command contract and permission model.
|
||||
|
||||
Current PR review status: open PR #120 (`feature/voice-mode`) is a useful prototype but should not merge as-is. It currently conflicts with the active capability-settings branch, advertises `voice.*` commands without the default-off Settings gate used for other privacy-sensitive capability groups, widens operator scopes in the same PR, persists cloud TTS provider keys in plain settings JSON, and introduces a Windows-specific wire schema before the Mac runtime/controller/session contract is agreed. Safe next step: split schema, gateway scope, chat transport, Windows runtime, WebChat integration, and cloud-provider credentials into separate reviews; keep the first merge behind a default-off Voice Settings group and gateway dangerous-command allowlist.
|
||||
|
||||
---
|
||||
|
||||
## Architectural Questions
|
||||
@ -513,7 +524,7 @@ On macOS: launchd plist. On Linux: systemd unit. On Windows, options include:
|
||||
- **Startup folder** (simplest, least robust)
|
||||
- **Tray app manages gateway process** (like macOS menubar app can start/stop gateway)
|
||||
|
||||
The Mac menubar app has "Gateway start/stop/restart" in its menu. The tray app has this marked as ❌ in the parity table. If the gateway runs on Windows, the tray app could manage it.
|
||||
The Mac menubar app has "Gateway start/stop/restart" in its menu. Windows Command Center can restart a tray-managed SSH tunnel, but it intentionally does not stop or kill externally managed gateway processes. If the gateway runs as a future Windows-managed process, the tray app could add explicit start/stop/restart controls for that owned process.
|
||||
|
||||
### 4. WSL2 networking: the NAT problem
|
||||
|
||||
@ -564,10 +575,11 @@ The node protocol requires a stable device identity (`device.id`) derived from a
|
||||
- [x] `system.notify` — agent can request Windows toast notifications
|
||||
- [x] `canvas.present` / `canvas.hide` — floating WebView2 canvas window
|
||||
- [x] `canvas.navigate` / `canvas.eval` / `canvas.snapshot` — full canvas support
|
||||
- [ ] `canvas.a2ui.push` / `canvas.a2ui.reset` — A2UI rendering (investigating: agent tool policy blocks)
|
||||
- [x] `canvas.a2ui.push` / `canvas.a2ui.pushJSONL` / `canvas.a2ui.reset` — A2UI rendering
|
||||
- [x] `device.info` / `device.status` — metadata and lightweight status payloads
|
||||
- [x] `system.run` — exec commands on Windows (PowerShell/cmd) with ICommandRunner abstraction
|
||||
- [x] `system.execApprovals.get/set` — remote-manageable exec approval policy
|
||||
- [ ] Settings UI for node capabilities (enable/disable camera, screen, etc.)
|
||||
- [x] Settings UI for node capabilities (enable/disable canvas, screen, camera, location, browser proxy)
|
||||
- [x] Resolve #9 (WebView2 ARM64) — required for canvas
|
||||
|
||||
**Depends on:** #5 (Canvas Panel), #9 (WebView2 ARM64)
|
||||
@ -577,10 +589,9 @@ The node protocol requires a stable device identity (`device.id`) derived from a
|
||||
|
||||
- [x] `camera.list` — enumerate Windows cameras (DeviceInformation.FindAllAsync)
|
||||
- [x] `camera.snap` — capture photo from webcam (MediaCapture + frame reader fallback)
|
||||
- [ ] `camera.clip` — record short video clip (MediaCapture + MediaEncoding)
|
||||
- [ ] `screen.record` — capture Windows desktop via Graphics Capture API
|
||||
- [x] `screen.capture` — screenshot via Windows.Graphics.Capture
|
||||
- [x] `screen.list` — enumerate monitors with bounds/working area
|
||||
- [x] `camera.clip` — record short video clip (MediaCapture + MediaEncoding)
|
||||
- [x] `screen.record` — capture Windows desktop via Graphics Capture API
|
||||
- [x] `screen.snapshot` — screenshot via Windows.Graphics.Capture
|
||||
- [x] Permission prompts (camera: UnauthorizedAccessException → toast; future MSIX consent)
|
||||
- [x] Multi-monitor support for screen capture (`screenIndex` param)
|
||||
|
||||
@ -598,13 +609,15 @@ The node protocol requires a stable device identity (`device.id`) derived from a
|
||||
### Phase 4: Feature Parity + Polish
|
||||
**Priority: LOW | Effort: Medium | Impact: Medium**
|
||||
|
||||
- [ ] `location.get` — Windows Location API
|
||||
- [x] `location.get` — Windows Location API
|
||||
- [ ] TTS / Speech Synthesis
|
||||
- [ ] Microphone / voice input
|
||||
- [ ] Browser proxy (Playwright on Windows, launched by tray app)
|
||||
- [x] `browser.proxy` — local browser-control bridge on gateway port + 2, including SSH companion-forward diagnostics
|
||||
- [x] Browser-control host setup guidance and local host runtime smoke for end-to-end browser smoke tests
|
||||
- [ ] Bundled/browser-control host installer/launcher
|
||||
- [ ] UI Automation (Windows equivalent of macOS Accessibility API)
|
||||
- [ ] Auto-update improvements (current auto-update from GitHub Releases → MSI/MSIX?)
|
||||
- [ ] PowerToys Command Palette integration for node commands
|
||||
- [x] PowerToys Command Palette integration for Command Center diagnostics entrypoint
|
||||
|
||||
---
|
||||
|
||||
@ -614,25 +627,27 @@ The node protocol requires a stable device identity (`device.id`) derived from a
|
||||
|
||||
```
|
||||
OpenClaw.Shared/
|
||||
├── OpenClawGatewayClient.cs ← existing operator client
|
||||
├── OpenClawNodeClient.cs ← NEW: node protocol handler
|
||||
├── INodeCommandHandler.cs ← NEW: interface for command dispatch
|
||||
├── NodeIdentity.cs ← NEW: keypair + device ID
|
||||
└── Models/
|
||||
├── NodeConnectParams.cs ← NEW
|
||||
├── NodeInvokeRequest.cs ← NEW
|
||||
└── NodeInvokeResponse.cs ← NEW
|
||||
├── OpenClawGatewayClient.cs ← operator client
|
||||
├── WindowsNodeClient.cs ← node protocol handler
|
||||
├── DeviceIdentity.cs ← Ed25519 keypair + device token
|
||||
├── NodeCapabilities.cs ← command/capability interfaces
|
||||
└── Capabilities/
|
||||
├── CanvasCapability.cs
|
||||
├── CameraCapability.cs
|
||||
├── ScreenCapability.cs
|
||||
├── LocationCapability.cs
|
||||
└── SystemCapability.cs
|
||||
|
||||
OpenClaw.Tray/
|
||||
OpenClaw.Tray.WinUI/
|
||||
├── Services/
|
||||
│ ├── NodeService.cs ← NEW: orchestrates node connection
|
||||
│ ├── CanvasService.cs ← NEW: handles canvas.* commands
|
||||
│ ├── CameraService.cs ← NEW: handles camera.* commands
|
||||
│ ├── ScreenService.cs ← NEW: handles screen.* commands
|
||||
│ ├── SystemService.cs ← NEW: handles system.* commands
|
||||
│ └── ExecApprovals.cs ← NEW: local approval store
|
||||
│ ├── NodeService.cs ← orchestrates node connection
|
||||
│ ├── CameraCaptureService.cs
|
||||
│ ├── ScreenCaptureService.cs
|
||||
│ ├── ScreenRecordingService.cs
|
||||
│ ├── LocalCommandRunner.cs
|
||||
│ └── SettingsManager.cs
|
||||
├── Windows/
|
||||
│ ├── CanvasWindow.xaml ← NEW: floating WebView2 canvas
|
||||
│ ├── CanvasWindow.xaml ← floating WebView2 canvas
|
||||
│ └── CanvasWindow.xaml.cs
|
||||
```
|
||||
|
||||
@ -649,12 +664,15 @@ Tray App Start
|
||||
│ └─ (existing functionality)
|
||||
│
|
||||
└─ Connect WS #2: role=node
|
||||
├─ Advertise caps: [canvas, camera, screen, system, notifications]
|
||||
├─ Advertise commands: [canvas.*, camera.*, screen.*, system.*]
|
||||
├─ Advertise caps: [canvas, camera, location, screen, system]
|
||||
├─ Advertise commands: [canvas.*, camera.*, location.get, screen.*, system.*]
|
||||
├─ Handle node.invoke requests
|
||||
│ ├─ canvas.present → show/navigate CanvasWindow
|
||||
│ ├─ canvas.snapshot → WebView2 CapturePreview
|
||||
│ ├─ camera.snap → MediaCapture → JPEG → base64
|
||||
│ ├─ camera.clip → MediaCapture → MP4 → base64
|
||||
│ ├─ location.get → Windows.Devices.Geolocation
|
||||
│ ├─ screen.snapshot → GraphicsCapture → image base64
|
||||
│ ├─ screen.record → GraphicsCapture → MP4 → base64
|
||||
│ ├─ system.run → Process.Start → stdout/stderr
|
||||
│ └─ system.notify → ToastNotification
|
||||
@ -669,29 +687,29 @@ This is a big effort and **contributions are very welcome!** Here's how to get s
|
||||
|
||||
### Good First Issues
|
||||
|
||||
1. **Device identity module** — Generate Ed25519 keypair, store in `%APPDATA%`, derive fingerprint. Pure crypto, well-defined scope.
|
||||
2. **`system.notify` handler** — Accept title + body + priority, show a Windows toast. The tray app already shows toasts — this just adds the node protocol wrapper.
|
||||
3. **`system.run` handler** — Execute a command via `Process.Start`, return stdout/stderr/exit code. Add exec approvals.
|
||||
1. **Capability diagnostics copy** — ✅ Command Center can copy a summary of declared commands, gateway allowlist status, and dangerous-command opt-ins.
|
||||
2. **Gateway health summary** — Show version, update state, auth state, and active connection health in one panel.
|
||||
3. **Channel status cards** — Surface configured/running/error/probe state for channels.
|
||||
|
||||
### Medium Issues
|
||||
|
||||
4. **Node protocol client** (`OpenClawNodeClient`) — WebSocket connect with `role: "node"`, handle `node.invoke` dispatch. Builds on the existing `OpenClawGatewayClient`.
|
||||
5. **Canvas floating window** — WebView2 in a borderless/floating window that appears on `canvas.present` and hides on `canvas.hide`. Related: #5.
|
||||
4. **Browser proxy parity** — Windows now includes a Mac-compatible local `browser.proxy` bridge to the browser control host on gateway port + 2, and managed SSH tunnel mode forwards local+2 to remote+2 when the browser proxy capability is enabled; continue hardening live browser-host setup guidance and diagnostics.
|
||||
5. **Gateway/channel flyout** — Show configured/running/error/probe state for channels and gateway health in the tray.
|
||||
|
||||
### Harder Issues
|
||||
|
||||
6. **Camera capture** — `Windows.Media.Capture` for photos and video clips. Handle permissions, multiple cameras, front/back mapping.
|
||||
7. **Screen recording** — `Windows.Graphics.Capture` for screen recording. Handle multi-monitor, permission consent, encoding to MP4.
|
||||
8. **Native Windows gateway audit** — Run `openclaw gateway` on Windows, identify and fix platform-specific failures.
|
||||
6. **Voice mode parity** — PR #120 has been reviewed and should stay blocked until it is rebased/split, gated default-off through Settings, aligned with a shared Mac/gateway voice command contract, and hardened for credential storage and permission prompts.
|
||||
7. **Native Windows gateway audit** — Run `openclaw gateway` on Windows, identify and fix platform-specific failures.
|
||||
8. **Richer channel operations** — Add tray surfaces for channel configuration, probe status, token source, last error, and recovery actions.
|
||||
|
||||
### Development Setup
|
||||
|
||||
See #7 / #8 for DEVELOPMENT.md. Quick start:
|
||||
```bash
|
||||
See `DEVELOPMENT.md`. Quick start:
|
||||
```powershell
|
||||
git clone https://github.com/shanselman/openclaw-windows-hub.git
|
||||
cd openclaw-windows-hub
|
||||
dotnet build
|
||||
dotnet run --project src/OpenClaw.Tray
|
||||
.\build.ps1
|
||||
dotnet run --project src\OpenClaw.Tray.WinUI\OpenClaw.Tray.WinUI.csproj
|
||||
```
|
||||
|
||||
Requires .NET 10.0 SDK, Windows 10/11. For testing node protocol, you'll need a running OpenClaw gateway (in WSL2 or on another machine).
|
||||
@ -700,12 +718,9 @@ Requires .NET 10.0 SDK, Windows 10/11. For testing node protocol, you'll need a
|
||||
|
||||
## Open Questions
|
||||
|
||||
- [ ] Does the gateway protocol support dual-role connections, or must we open two WebSockets?
|
||||
- [ ] What's the minimum `PROTOCOL_VERSION` the node connect needs? (Currently 3)
|
||||
- [ ] Should exec from a Windows node default to PowerShell or cmd.exe?
|
||||
- [ ] How should the tray app handle "node in background" — Windows can suspend tray apps. Do we need a background service?
|
||||
- [ ] Can the Graphics Capture API work without a visible window / user picker? (Background capture requires Windows 11+)
|
||||
- [ ] Should we pursue MSIX packaging for the tray app to unlock restricted capabilities?
|
||||
- [x] Should dangerous command opt-ins be shown in the tray as a guided repair flow, a docs link, or both? Command Center now shows copyable safety guidance but intentionally avoids one-click dangerous repair commands.
|
||||
- [ ] How much channel management should live in the native tray versus opening the web dashboard?
|
||||
- [x] Should Voice Mode land as a separate parity track after the open PR is reviewed against current Mac architecture? Yes. PR #120 should not advertise voice commands from Windows until the shared contract, Settings gate, gateway allowlist, and credential-storage concerns are resolved.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
## Overview
|
||||
|
||||
The Windows Node feature allows the tray app to receive commands from the OpenClaw agent (canvas, screenshots, notifications). This is **experimental** and must be explicitly enabled in Settings.
|
||||
The Windows Node feature allows the tray app to receive commands from the OpenClaw agent (canvas, screenshots, screen recordings, camera, location, notifications, and controlled command execution). This is **experimental** and must be explicitly enabled in Settings.
|
||||
|
||||
## How to Enable
|
||||
|
||||
@ -25,8 +25,8 @@ The Windows Node feature allows the tray app to receive commands from the OpenCl
|
||||
```
|
||||
[INFO] Starting Windows Node connection to ws://...
|
||||
[INFO] Node connected, waiting for challenge...
|
||||
[INFO] Sent node registration with X capabilities, Y commands
|
||||
[INFO] Node registered successfully!
|
||||
[INFO] Registered capability: screen (2 commands)
|
||||
[INFO] All capabilities registered
|
||||
[INFO] Node status: Connected
|
||||
```
|
||||
|
||||
@ -34,6 +34,12 @@ The Windows Node feature allows the tray app to receive commands from the OpenCl
|
||||
- When the agent captures your screen, you should see "📸 Screen Captured" toast
|
||||
- This is throttled to max once per 10 seconds
|
||||
|
||||
### 4. Command Center
|
||||
- Open the tray status detail or launch `openclaw://commandcenter`
|
||||
- In Node Mode, verify the window shows gateway channel health from node `health` events plus a synthesized local Windows node when operator `node.list` is not connected
|
||||
- Check diagnostics for pairing approval, stale health, all-stopped channels, allowlist filtering, browser control host availability for `browser.proxy`, and usage-cost gaps
|
||||
- Use "Copy fix" only for safe repair commands; privacy-sensitive commands remain informational unless you explicitly opt in on the gateway
|
||||
|
||||
## What Requires Gateway Support
|
||||
|
||||
These features need the gateway to send `node.invoke` commands:
|
||||
@ -44,24 +50,38 @@ These features need the gateway to send `node.invoke` commands:
|
||||
| `canvas.hide` | Hide canvas window | Closes the canvas window |
|
||||
| `canvas.eval` | Execute JavaScript | Runs JS in canvas, returns result |
|
||||
| `canvas.snapshot` | Capture canvas | Returns base64 PNG of canvas content |
|
||||
| `screen.capture` | Take screenshot | Captures screen, shows notification, returns base64 |
|
||||
| `screen.list` | List monitors | Returns array of monitor info |
|
||||
| `canvas.a2ui.pushJSONL` | Legacy A2UI JSONL push | Routes through same renderer path as `canvas.a2ui.push` |
|
||||
| `screen.snapshot` | Take screenshot | Captures screen, shows notification, returns base64 |
|
||||
| `screen.record` | Record short screen clip | Returns MP4/base64 metadata; requires explicit gateway allowlist |
|
||||
| `system.notify` | Show notification | Displays toast notification |
|
||||
| `system.run` / `system.which` | Controlled command execution | Uses local exec approval policy; `prompt` decisions show a Windows Allow once / Always allow / Deny dialog |
|
||||
| `camera.list` | Enumerate cameras | Returns device IDs and names |
|
||||
| `camera.snap` | Capture photo | Returns base64 image (NV12 fallback) |
|
||||
| `camera.clip` | Capture video clip | Returns MP4/base64 metadata |
|
||||
| `location.get` | Get Windows location | Uses Windows location permission/settings |
|
||||
| `device.info` / `device.status` | Device metadata/status | Returns host/app/locale plus battery/storage/network/uptime payloads |
|
||||
| `browser.proxy` | Proxy browser-control host requests | Requires Browser proxy bridge enabled, a compatible browser-control host listening on gateway port + 2, and matching browser-control auth |
|
||||
| `stt.transcribe` | Speech-to-text from default microphone | Default-off; bounded `maxDurationMs` ≤ 30000; concatenates phrases until duration elapses; requires explicit gateway allowlist |
|
||||
| `tts.speak` | Speak text aloud | Requires Text-to-speech playback enabled in Settings; gateway mode also requires `tts.speak` in `gateway.nodes.allowCommands` |
|
||||
|
||||
## Capabilities Advertised
|
||||
|
||||
When the node connects, it advertises these capabilities:
|
||||
- `canvas` - WebView2-based canvas window
|
||||
- `screen` - Screen capture via GDI
|
||||
- `screen` - Screen snapshot and recording via Windows.Graphics.Capture
|
||||
- `system` - Notifications, command execution (`system.run`, `system.run.prepare`, `system.which`), exec approval policy
|
||||
- `camera` - MediaCapture photo capture (frame reader fallback)
|
||||
- `camera` - MediaCapture photo/video capture (frame reader fallback)
|
||||
- `location` - Windows.Devices.Geolocation
|
||||
- `device` - Host/app metadata and lightweight status
|
||||
- `browser` - Local `browser.proxy` bridge to a browser-control host on gateway port + 2, when enabled in Settings
|
||||
- `tts` - Windows speech synthesis or ElevenLabs playback, when enabled in Settings
|
||||
|
||||
## Security Features
|
||||
|
||||
- **URL Validation**: Canvas blocks `file://`, `javascript:`, localhost, private IPs, IPv6 localhost
|
||||
- **Screen Capture Notification**: User is notified when screen is captured
|
||||
- **Screen Capture Notification**: User is notified when screen snapshots are captured
|
||||
- **Screen Recording Allowlist**: `screen.record` must be explicitly allowed by the gateway and does not leave a hidden local MP4 copy on Windows
|
||||
- **Command Center Redaction**: recent node invoke activity records command name, status, duration, node id, and privacy class only; it does not store base64 payloads, screenshots, recordings, tokens, or command arguments
|
||||
- **Node Mode Toggle**: Must be explicitly enabled by user
|
||||
- **Command Validation**: Only alphanumeric commands with dots/hyphens allowed
|
||||
|
||||
@ -76,6 +96,15 @@ When the node connects, it advertises these capabilities:
|
||||
- Ensure Windows notifications are enabled for the app
|
||||
- Check if notification settings in the app are enabled
|
||||
|
||||
### `browser.proxy` reports no browser-control host
|
||||
- Confirm the Browser proxy bridge toggle is enabled in Settings, then save and reconnect or re-pair if the gateway keeps an older command snapshot.
|
||||
- The bridge is local-only: it calls `http://127.0.0.1:<gateway-port+2>` from Windows. For a gateway on `ws://127.0.0.1:18789`, the browser-control host must listen on `127.0.0.1:18791`.
|
||||
- In managed SSH tunnel mode, keep Browser proxy bridge enabled so the tray forwards local gateway port + 2 to remote gateway port + 2. Settings shows a selectable preview of the exact `ssh -N -L ...` command.
|
||||
- If using a manual SSH tunnel, add both forwards, for example: `ssh -N -L 18789:127.0.0.1:18789 -L 18791:127.0.0.1:18791 <user>@<host>`. If local and remote gateway ports differ, forward `<local-gateway-port+2>` to `127.0.0.1:<remote-gateway-port+2>`.
|
||||
- A local SSH forward is not enough if the remote browser-control host is not running. Command Center port diagnostics should show whether the local gateway and browser-control ports are listening and which process owns them.
|
||||
- If Command Center shows the browser-control port listening but `browser.proxy` returns an auth error, verify the Windows Settings gateway token matches the browser-control host token/password. QR/bootstrap pairing can connect the node without saving a shared gateway token, but browser-control auth may still require one.
|
||||
- A local smoke can verify the host dependency without proving gateway invoke auth: start the upstream browser-control host with a temporary no-secret config, confirm `http://127.0.0.1:<gateway-port+2>/` and `/tabs` return HTTP 200, then stop the captured host process. The full parity smoke is not complete until `openclaw nodes invoke --command browser.proxy` succeeds through the active gateway.
|
||||
|
||||
### Canvas window doesn't appear
|
||||
- Check logs for `canvas.present` command received
|
||||
- Verify URL is not blocked by security validation
|
||||
@ -84,22 +113,60 @@ When the node connects, it advertises these capabilities:
|
||||
- If you see "Camera access blocked", enable camera access for desktop apps in Windows Privacy settings
|
||||
- Packaged MSIX builds will show the system consent prompt automatically
|
||||
|
||||
### `stt.transcribe` returns "Speech recognition failed" or "Internal Speech Error"
|
||||
- Open Windows Settings → Privacy & security → Speech (`ms-settings:privacy-speech`)
|
||||
- Turn **Online speech recognition** = On. The Windows speech recognizer's default dictation grammar often fails without it, and Windows surfaces an unmapped HRESULT as "Internal Speech Error"
|
||||
- Open Windows Settings → Time & language → Language & region (`ms-settings:regionlanguage`), select your display language → Language options, and confirm **Speech** appears under Installed features (install it if not, ~50 MB; reboot or sign out/in afterward)
|
||||
- Verify the recognizer end-to-end with `ms-settings:speech` → "Microphone" → **Get started** before re-trying `stt.transcribe`
|
||||
|
||||
### `stt.transcribe` returns "Microphone permission denied"
|
||||
- Open Windows Settings → Privacy & security → Microphone
|
||||
- Ensure **Microphone access** (top-level toggle) is on
|
||||
- For **unpackaged** tray builds (the default `.\build.ps1` output): ensure **Let desktop apps access your microphone** is on. The tray exe will **not** appear as its own row — desktop-app access is granted as a group, not per-app
|
||||
- For **packaged MSIX** tray builds: the tray appears as its own entry under "Let apps access your microphone" and must be individually enabled (the OS shows a consent prompt on first use)
|
||||
- After changing permissions, re-pair the node so the gateway picks up the new advertised command
|
||||
|
||||
### `stt.transcribe` returns "Language pack 'X' is not installed"
|
||||
- Open Windows Settings → Time & language → Language & region
|
||||
- Add the requested display language and ensure the **Speech** optional feature is installed
|
||||
- Restart the tray after installing the speech pack
|
||||
|
||||
### Manual STT validation
|
||||
1. Enable Node Mode in Settings.
|
||||
2. Enable **Speech-to-text (microphone)** in Settings → Node mode.
|
||||
3. Append `stt.transcribe` to your existing gateway allowlist (do **not** copy a literal `...` — substitute the commands you already allow). For example, starting from the recommended Windows safe companion list:
|
||||
```bash
|
||||
openclaw config set gateway.nodes.allowCommands '["canvas.present","canvas.hide","canvas.navigate","canvas.eval","canvas.snapshot","canvas.a2ui.push","canvas.a2ui.pushJSONL","canvas.a2ui.reset","camera.list","location.get","screen.snapshot","device.info","device.status","system.execApprovals.get","system.execApprovals.set","stt.transcribe"]'
|
||||
openclaw gateway restart
|
||||
```
|
||||
4. Re-pair or re-approve the node so the gateway refreshes its command snapshot.
|
||||
5. Invoke and speak a short phrase:
|
||||
```bash
|
||||
openclaw nodes invoke --node <id> --command stt.transcribe \
|
||||
--params '{"maxDurationMs":5000,"language":"en-US"}'
|
||||
```
|
||||
6. The Windows microphone OS indicator should appear during recognition. Confirm a `transcribed:true` payload returns the text.
|
||||
|
||||
## Remaining Work (Roadmap)
|
||||
|
||||
1. ~~**system.run + exec approvals**~~ ✅ Implemented
|
||||
- `system.run` with PowerShell/cmd support
|
||||
- `system.run.prepare` pre-flight command
|
||||
- `system.which` command lookup
|
||||
- `system.execApprovals` allowlist flow
|
||||
2. **screen.record**
|
||||
- `system.execApprovals` allowlist flow with base-hash optimistic concurrency for remote edits
|
||||
- `system.run` environment override sanitizer blocks path/toolchain injection and secret-looking variables
|
||||
2. ~~**screen.record**~~ ✅ Implemented
|
||||
- Graphics Capture video recording (MP4/base64)
|
||||
3. **camera.clip**
|
||||
3. ~~**camera.clip**~~ ✅ Implemented
|
||||
- Short webcam video capture (MediaCapture + encoding)
|
||||
4. **A2UI end-to-end**
|
||||
- Resolve tool policy/allowlist and validate JSONL rendering
|
||||
5. **Packaging & consent prompts**
|
||||
4. ~~**A2UI pushJSONL alias + device status**~~ ✅ Implemented
|
||||
- Legacy `canvas.a2ui.pushJSONL`
|
||||
- Safe `device.info` / `device.status`
|
||||
5. ~~**Command Center diagnostics**~~ ✅ Implemented
|
||||
- Channel/node/usage/pairing/allowlist diagnostics and recent invoke timeline
|
||||
6. **Packaging & consent prompts**
|
||||
- MSIX packaging with camera/screen capabilities for system prompts
|
||||
6. **Test matrix & polish**
|
||||
7. **Test matrix & polish**
|
||||
- Canvas/screen/camera regression tests
|
||||
- Handle timeouts/disconnects, reduce verbose logging
|
||||
|
||||
@ -108,5 +175,7 @@ When the node connects, it advertises these capabilities:
|
||||
- `src/OpenClaw.Shared/WindowsNodeClient.cs` - Node protocol client
|
||||
- `src/OpenClaw.Shared/Capabilities/*.cs` - Capability handlers
|
||||
- `src/OpenClaw.Tray.WinUI/Services/NodeService.cs` - Orchestrates capabilities
|
||||
- `src/OpenClaw.Tray.WinUI/Services/ScreenCaptureService.cs` - GDI screen capture
|
||||
- `src/OpenClaw.Tray.WinUI/Services/ScreenCaptureService.cs` - screen snapshots
|
||||
- `src/OpenClaw.Tray.WinUI/Services/ScreenRecordingService.cs` - screen recordings
|
||||
- `src/OpenClaw.Tray.WinUI/Services/CameraCaptureService.cs` - camera photo/video capture
|
||||
- `src/OpenClaw.Tray.WinUI/Windows/CanvasWindow.xaml` - WebView2 canvas
|
||||
|
||||
70
docs/a2ui/README.md
Normal file
70
docs/a2ui/README.md
Normal file
@ -0,0 +1,70 @@
|
||||
# A2UI v0.8 — Overview & Implementation Grading
|
||||
|
||||
This folder is the entry point for everything A2UI in this repo. It captures
|
||||
the v0.8 specification, the standard catalog, and a side-by-side grading of
|
||||
two implementations:
|
||||
|
||||
- **Lit reference** in `C:\Users\andersonch\Code\openclaw` (web components,
|
||||
rendered in a browser via the OpenClaw canvas host).
|
||||
- **Native WinUI** in this repo (`src/OpenClaw.Tray.WinUI/A2UI/`,
|
||||
branch `feat/a2ui-native-winui`).
|
||||
|
||||
The native WinUI design doc that predates this overview lives at
|
||||
[`../A2UI_NATIVE_WINUI.md`](../A2UI_NATIVE_WINUI.md); this folder
|
||||
supersedes the parts of that doc that describe the spec and adds the
|
||||
grading.
|
||||
|
||||
## Contents
|
||||
|
||||
| Doc | What's in it |
|
||||
| --- | --- |
|
||||
| [`protocol.md`](./protocol.md) | Wire protocol — envelopes, JSONL, A2A extension, capability negotiation, lifecycle |
|
||||
| [`components.md`](./components.md) | Standard catalog — every component, every property, type, enum, behavior |
|
||||
| [`data-and-actions.md`](./data-and-actions.md) | A2UIValue tagged union, data model & paths, action dispatch, security |
|
||||
| [`grading.md`](./grading.md) | Side-by-side scoring of Lit vs WinUI vs spec, with file:line citations |
|
||||
|
||||
## Spec source of truth
|
||||
|
||||
| Document | URL |
|
||||
| --- | --- |
|
||||
| Protocol v0.8 | https://a2ui.org/specification/v0.8-a2ui/ |
|
||||
| A2A extension v0.8 | https://a2ui.org/specification/v0.8-a2a-extension/ |
|
||||
| Standard catalog (JSON) | https://a2ui.org/specification/v0_8/standard_catalog_definition.json |
|
||||
| Source / schemas | https://github.com/google/A2UI |
|
||||
| Evolution v0.8 → v0.9 | https://a2ui.org/specification/v0.9-evolution-guide/ |
|
||||
|
||||
These pages were captured 2026-04-27. v0.8 is the **stable / public preview**
|
||||
release; v0.9 exists as a draft.
|
||||
|
||||
## TL;DR — how the two implementations stack up
|
||||
|
||||
| Area | Lit (OpenClaw) | WinUI (this repo) | Spec |
|
||||
| --- | --- | --- | --- |
|
||||
| Component coverage | 18/18 | 18/18 | 18 in standard catalog |
|
||||
| Component property completeness | ~85% (4 documented TODOs) | ~95% | — |
|
||||
| Streaming / JSONL parser | Per-line, lenient | Per-line, lenient + size caps | line-delimited JSON |
|
||||
| Data model paths | Custom JSON-pointer-ish + auto-parse | Strict RFC 6901 | Path strings, format underspecified |
|
||||
| Action transport | DOM `CustomEvent` bubbling | Debounced dispatcher → gateway via `agent.request` | Client-to-server A2A `userAction` |
|
||||
| Bi-directional binding | ✓ via `processor.setData` | ✓ via `DataModelStore.Write` | Spec is silent — both impls add it |
|
||||
| Markdown in `Text` | ✓ (sandboxed iframe for HTML, escaped code) | ✗ (plain text only) | Spec is silent |
|
||||
| Modal | `<dialog>` w/ `showModal()` | `ContentDialog` (native) | Spec leaves shape open |
|
||||
| List virtualization | ✗ (StackPanel-style, all-at-once) | ✓ `ItemsRepeater` + cached child template | Spec calls for it |
|
||||
| URL safety / SSRF | None — passes URLs through to `<img>`/`<video>` | HTTPS+allowlist for `Image`/`Video`/`AudioPlayer`; DNS-rebinding pin via `SocketsHttpHandler.ConnectCallback` on `Image` only — `Video`/`AudioPlayer` hand the URI to `MediaSource.CreateFromUri`, which re-resolves at playback | Spec is silent (deferred) |
|
||||
| Secret redaction | ✗ | ✓ denylist (`password`, `secret`, `token`) + registered paths | Spec is silent |
|
||||
| Action context scoping | Caller's responsibility | Explicit `dataBinding` + implicit walk + secret filter | Spec defines `context[]` only |
|
||||
| Test coverage | One model unit test; no per-component | Render matrix, scale test, security tests, integration smoke | — |
|
||||
|
||||
The detailed scorecard with deductions per category is in
|
||||
[`grading.md`](./grading.md).
|
||||
|
||||
## How to use this folder
|
||||
|
||||
- If you're **adding a renderer** for a component: read
|
||||
[`components.md`](./components.md) for the spec'd properties, then
|
||||
[`grading.md`](./grading.md) for known WinUI gaps.
|
||||
- If you're **wiring a transport** (gateway, MCP bridge, etc.): read
|
||||
[`protocol.md`](./protocol.md) and the `data-and-actions.md` action
|
||||
section.
|
||||
- If you're **reviewing a PR that touches A2UI**: skim
|
||||
[`grading.md#known-deviations-by-category`](./grading.md#known-deviations-by-category)
|
||||
to see which deviations are intentional (good) vs known gaps (bad).
|
||||
323
docs/a2ui/components.md
Normal file
323
docs/a2ui/components.md
Normal file
@ -0,0 +1,323 @@
|
||||
# A2UI v0.8 — Standard Catalog (Components)
|
||||
|
||||
Source of truth: <https://a2ui.org/specification/v0_8/standard_catalog_definition.json>.
|
||||
|
||||
The v0.8 standard catalog defines **18 components** across three loose
|
||||
categories: containers, display, interactive. A v0.8-conformant client
|
||||
MUST recognize all 18 and either render them or fall back to an "unknown"
|
||||
placeholder for catalog-strict mode.
|
||||
|
||||
Each section below is the spec — required properties first, optional
|
||||
after, with enums spelled out. Where the WinUI or Lit impl has a known gap
|
||||
or improvement, it's flagged inline so this doc doubles as a quick lookup
|
||||
when wiring a new component. Detailed grading is in
|
||||
[`grading.md`](./grading.md).
|
||||
|
||||
Notation: `BoundValue` means an [`A2UIValue`](./data-and-actions.md#a2uivalue)
|
||||
tagged union — typically `{ literalString }` or `{ path }`. `Children`
|
||||
means `{ explicitList: string[] }` or `{ template: { dataBinding, componentId } }`.
|
||||
|
||||
---
|
||||
|
||||
## Containers
|
||||
|
||||
### `Row`
|
||||
Horizontal layout container.
|
||||
|
||||
| Property | Type | Required | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| `children` | `Children` | ✓ | `explicitList` or `template` |
|
||||
| `distribution` | enum | | `start` \| `center` \| `end` \| `spaceBetween` \| `spaceAround` \| `spaceEvenly` |
|
||||
| `alignment` | enum | | `start` \| `center` \| `end` \| `stretch` |
|
||||
|
||||
**Behavior**: lays children left-to-right; cross-axis = vertical alignment.
|
||||
|
||||
> WinUI: `StackPanel` (horizontal); `distribution` collapsed onto WinUI
|
||||
> `HorizontalAlignment` — `spaceBetween`/`spaceAround`/`spaceEvenly` all
|
||||
> map to `Stretch` (justify-content not natively available). Wrap to next
|
||||
> row not implemented.
|
||||
> Lit: full distribution support via CSS flex.
|
||||
|
||||
### `Column`
|
||||
Vertical layout container. Same property set as `Row`, swapping axes.
|
||||
|
||||
### `List`
|
||||
Scrollable list of children.
|
||||
|
||||
| Property | Type | Required | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| `children` | `Children` | ✓ | |
|
||||
| `direction` | enum | | `vertical` (default) \| `horizontal` |
|
||||
| `alignment` | enum | | `start` \| `center` \| `end` \| `stretch` |
|
||||
|
||||
**Behavior**: virtualization-friendly; spec calls for the client to
|
||||
realize only viewport children when possible.
|
||||
|
||||
> WinUI: `ItemsRepeater` w/ `StackLayout`, virtualized, child-element
|
||||
> cache keyed by component id (preserves data-binding subscriptions
|
||||
> across recycling).
|
||||
> Lit: builds all children up-front (no virtualization).
|
||||
> Lit: `template` form for List is partially honored only because all
|
||||
> three list-bearing components share the same children resolver. WinUI
|
||||
> only supports `explicitList` today.
|
||||
|
||||
### `Card`
|
||||
Single-child container with elevation/border treatment.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `child` | component-id (string) | ✓ |
|
||||
|
||||
> WinUI: `Border` w/ `CardBackgroundFillColorDefaultBrush`,
|
||||
> `theme.CornerRadius`, padding = `theme.Spacing * 2`.
|
||||
> Lit: slot-based wrap; CSS-driven elevation.
|
||||
|
||||
### `Tabs`
|
||||
Tabbed container.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `tabItems[]` | array | ✓ |
|
||||
| `tabItems[].title` | `BoundValue<string>` | ✓ |
|
||||
| `tabItems[].child` | component-id | ✓ |
|
||||
|
||||
> WinUI: `TabView`, close buttons disabled, no reorder/drag.
|
||||
> Lit: button strip + content region; tracks `selected` index in state.
|
||||
|
||||
### `Modal`
|
||||
Click-to-open dialog.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `entryPointChild` | component-id | ✓ |
|
||||
| `contentChild` | component-id | ✓ |
|
||||
|
||||
**Behavior**: render `entryPointChild` inline; on user interaction (e.g.,
|
||||
click), open a modal containing `contentChild`. Spec leaves "what closes
|
||||
the modal" open; both impls rely on platform dismissal (Esc, click-out).
|
||||
|
||||
> WinUI: `ContentDialog` triggered by wrapping `entryPointChild` in a
|
||||
> transparent `Button`. Native modal semantics.
|
||||
> Lit: `<dialog>` element + `showModal()`.
|
||||
|
||||
### `Divider`
|
||||
Visual separator.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `axis` | enum | | `horizontal` (default) \| `vertical` |
|
||||
|
||||
> WinUI: 1px `Rectangle`, `SystemControlForegroundBaseLowBrush`.
|
||||
> Lit: `<hr>`. **Gap**: Lit also exposes `thickness` and `color` in
|
||||
> types but doesn't apply them (root.ts:317 TODO).
|
||||
|
||||
---
|
||||
|
||||
## Display
|
||||
|
||||
### `Text`
|
||||
Text display.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `text` | `BoundValue<string>` | ✓ |
|
||||
| `usageHint` | enum | | `h1`–`h5`, `caption`, `body` |
|
||||
|
||||
> WinUI: `TextBlock` w/ Fluent theme styles (`TitleLarge`, `Subtitle`,
|
||||
> `BodyStrong`, `Caption`, `Body`). Plain text only.
|
||||
> Lit: **renders Markdown** via `markdown-it`. HTML blocks sandboxed in
|
||||
> `<iframe sandbox="">`; code blocks escaped. This is _beyond_ spec —
|
||||
> see [`grading.md`](./grading.md#text-markdown-divergence) for whether
|
||||
> that's a feature or a foot-gun.
|
||||
|
||||
### `Image`
|
||||
|
||||
| Property | Type | Required | Enum |
|
||||
| --- | --- | --- | --- |
|
||||
| `url` | `BoundValue<string>` | ✓ | |
|
||||
| `altText` | `BoundValue<string>` | | |
|
||||
| `fit` | enum | | `contain` \| `cover` \| `fill` \| `none` \| `scale-down` |
|
||||
| `usageHint` | enum | | `icon` \| `avatar` \| `smallFeature` \| `mediumFeature` \| `largeFeature` \| `header` |
|
||||
|
||||
> WinUI: `Image`; `usageHint` maps to fixed pixel sizes (24/40/80/160/240/full).
|
||||
> Avatar wraps in `Border` w/ circular `CornerRadius`. SVG via
|
||||
> `SvgImageSource` w/ 8s timeout. URLs gated by `MediaResolver` allowlist
|
||||
> + DNS-rebinding defense.
|
||||
> Lit: `<img>` directly; **no URL filtering** — `data:` and other schemes
|
||||
> pass through.
|
||||
|
||||
### `Icon`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `name` | `BoundValue<string>` | ✓ |
|
||||
|
||||
The 48 supported icon names (canonical enum):
|
||||
|
||||
```
|
||||
accountCircle, add, arrowBack, arrowForward, attachFile, calendarToday,
|
||||
call, camera, check, close, delete, download, edit, event, error,
|
||||
favorite, favoriteOff, folder, help, home, info, locationOn, lock,
|
||||
lockOpen, mail, menu, moreVert, moreHoriz, notificationsOff,
|
||||
notifications, payment, person, phone, photo, print, refresh, search,
|
||||
send, settings, share, shoppingCart, star, starHalf, starOff, upload,
|
||||
visibility, visibilityOff, warning
|
||||
```
|
||||
|
||||
> WinUI: `FontIcon` over Segoe Fluent Icons (MDL2). Unknown names →
|
||||
> `help` glyph; `moreHoriz` reuses `moreVert` (no canonical horizontal
|
||||
> ellipsis in MDL2). Logs once per unmapped name per process.
|
||||
> Lit: CSS background-image sprite; lowercases CamelCase to snake_case
|
||||
> at lookup (icon.ts:53).
|
||||
|
||||
### `Video`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `url` | `BoundValue<string>` | ✓ |
|
||||
|
||||
> WinUI: `MediaPlayerElement` w/ transport controls. URL gated by
|
||||
> `MediaResolver` HTTPS+allowlist. **No DNS-rebinding pin** — the OS
|
||||
> media stack does its own DNS lookup at playback time, so the
|
||||
> hostname-allowlist is the load-bearing defense (image fetches use a
|
||||
> separate, safer path that does pin).
|
||||
> Lit: `<video controls>`.
|
||||
|
||||
### `AudioPlayer`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `url` | `BoundValue<string>` | ✓ |
|
||||
| `description` | `BoundValue<string>` | | |
|
||||
|
||||
> WinUI: `MediaPlayerElement` w/ `description` rendered above as
|
||||
> `Caption`. URL gated by `MediaResolver` HTTPS+allowlist; same
|
||||
> playback-time DNS caveat as `Video`.
|
||||
> Lit: `<audio controls>`; **`description` is ignored** (audio.ts).
|
||||
|
||||
---
|
||||
|
||||
## Interactive
|
||||
|
||||
### `Button`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `child` | component-id | ✓ |
|
||||
| `action` | `Action` object | ✓ |
|
||||
| `primary` | bool | | |
|
||||
|
||||
`Action` shape:
|
||||
```json
|
||||
{
|
||||
"name": "submit",
|
||||
"context": [
|
||||
{ "key": "email", "value": { "path": "/form/email" } },
|
||||
{ "key": "kind", "value": { "literalString": "primary" } }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
> WinUI: `Button`; `primary` → `AccentButtonStyle`; click raises
|
||||
> `A2UIAction` through the dispatcher (see
|
||||
> [`data-and-actions.md`](./data-and-actions.md#actions)).
|
||||
> Lit: `<button>`; click dispatches a DOM `CustomEvent`.
|
||||
|
||||
### `CheckBox`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `label` | `BoundValue<string>` | ✓ |
|
||||
| `value` | `BoundValue<bool>` | ✓ |
|
||||
|
||||
> Both impls: bi-directional binding — toggle writes back to the
|
||||
> `value.path` data-model location. Spec is silent on write-back.
|
||||
|
||||
### `TextField`
|
||||
|
||||
| Property | Type | Required | Enum |
|
||||
| --- | --- | --- | --- |
|
||||
| `label` | `BoundValue<string>` | ✓ | |
|
||||
| `text` | `BoundValue<string>` | | |
|
||||
| `textFieldType` | enum | | `shortText` (default) \| `longText` \| `number` \| `date` \| `obscured` |
|
||||
| `validationRegexp` | string | | |
|
||||
|
||||
> WinUI: `TextBox` (or `PasswordBox` if `obscured`); `obscured` paths
|
||||
> auto-marked as secrets. `InputScope` set per type. **`validationRegexp`
|
||||
> not enforced**.
|
||||
> Lit: `<input>` / `<textarea>`. **`validationRegexp` not enforced**
|
||||
> (root.ts:367 TODO).
|
||||
|
||||
### `DateTimeInput`
|
||||
|
||||
| Property | Type | Required | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| `value` | `BoundValue<string>` | ✓ | ISO 8601 |
|
||||
| `enableDate` | bool | | |
|
||||
| `enableTime` | bool | | |
|
||||
|
||||
> WinUI: `CalendarDatePicker` + `TimePicker`; ISO-8601 round-trip.
|
||||
> Lit: `<input type="date|time|datetime-local">`. **`outputFormat`
|
||||
> noted in code but ignored** (datetime-input.ts:159 TODO).
|
||||
|
||||
### `MultipleChoice`
|
||||
|
||||
| Property | Type | Required | Enum |
|
||||
| --- | --- | --- | --- |
|
||||
| `selections` | `BoundValue<array>` (or `path`) | ✓ | |
|
||||
| `options[]` | array | ✓ | each: `{ label: BoundValue<string>, value: string }` |
|
||||
| `maxAllowedSelections` | integer | | |
|
||||
| `variant` | enum | | `checkbox` \| `chips` |
|
||||
| `filterable` | bool | | |
|
||||
|
||||
> WinUI: `ComboBox` (single) or `ListView` (multi). When
|
||||
> `maxAllowedSelections == 1` it writes a scalar to the path (not a
|
||||
> 1-element array) — back-compat reads tolerate either. **`variant` and
|
||||
> `filterable` not honored**.
|
||||
> Lit: `<select multiple>`. **`maxAllowedSelections` not enforced**
|
||||
> (root.ts:334 TODO); selections array resolution incomplete
|
||||
> (multiple-choice.ts:87–103).
|
||||
|
||||
### `Slider`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `label` | `BoundValue<string>` | | |
|
||||
| `value` | `BoundValue<number>` | ✓ |
|
||||
| `minValue` | number | | |
|
||||
| `maxValue` | number | | |
|
||||
|
||||
> WinUI: `Slider`, defaults min=0/max=100/step=1. Bi-directional bind.
|
||||
> Lit: `<input type="range">`. Bi-directional bind.
|
||||
|
||||
---
|
||||
|
||||
## Catalog-strict mode
|
||||
|
||||
Both implementations must reject **anything not in the 18 above** by
|
||||
rendering a placeholder, not by throwing. This is one of the few
|
||||
"MUST" requirements in the spec:
|
||||
|
||||
> The full set of available component types and their properties is
|
||||
> defined by a Catalog Schema, not in the core protocol schema.
|
||||
|
||||
> WinUI: `UnknownRenderer` — orange-bordered placeholder w/ warning
|
||||
> icon and component name. Telemetry event fired.
|
||||
> Lit: walks a `componentRegistry`; allows custom components when
|
||||
> `enableCustomElements` flag is set (extension beyond spec).
|
||||
|
||||
## Catalog-level styles (theme tokens)
|
||||
|
||||
Each catalog optionally declares `styles`:
|
||||
|
||||
| Token | Type |
|
||||
| --- | --- |
|
||||
| `font` | string (font family) |
|
||||
| `primaryColor` | hex `#RRGGBB` |
|
||||
|
||||
> WinUI: `A2UITheme.Parse()` reads these plus nested
|
||||
> `colors.{accent,background,foreground,card}`,
|
||||
> `typography.fontFamily`, `radius`, `spacing`. Applied to the surface
|
||||
> Grid resource scope (not global).
|
||||
> Lit: derives a `--p-0` … `--p-100` palette via CSS `color-mix` from
|
||||
> `primaryColor`.
|
||||
190
docs/a2ui/data-and-actions.md
Normal file
190
docs/a2ui/data-and-actions.md
Normal file
@ -0,0 +1,190 @@
|
||||
# A2UI v0.8 — Data Binding & Actions
|
||||
|
||||
## A2UIValue
|
||||
|
||||
Almost every property on a component is an `A2UIValue` — a tagged union of
|
||||
literal types and a path into the data model.
|
||||
|
||||
```jsonc
|
||||
// All of these are valid:
|
||||
{ "literalString": "Hello" }
|
||||
{ "literalNumber": 42 }
|
||||
{ "literalBoolean": true }
|
||||
{ "literalArray": ["a", "b", "c"] } // array-of-string only
|
||||
{ "path": "/user/name" } // bind to data-model location
|
||||
|
||||
// "Implicit initialization" (literal + path together):
|
||||
{ "literalString": "default", "path": "/form/title" }
|
||||
// → on first resolve, the client writes "default" to /form/title,
|
||||
// then binds. After that it's a path binding.
|
||||
```
|
||||
|
||||
The spec does **not** enumerate `literalArray<number>` or `literalArray<bool>`
|
||||
— string arrays are the only explicit array literal in v0.8.
|
||||
|
||||
### Resolution at runtime
|
||||
|
||||
When a component renders or re-renders, each `A2UIValue` property is
|
||||
resolved:
|
||||
|
||||
1. If a literal is present → use it. (Casting is impl-defined; both
|
||||
impls coerce numbers ↔ strings as needed for display.)
|
||||
2. Else if `path` is present → look up the value in the surface's data
|
||||
model and use it.
|
||||
3. Else → property is "unset" (component decides default behavior).
|
||||
|
||||
### Path syntax
|
||||
|
||||
Paths are JSON-pointer-_ish_ strings (`/foo/bar/0`). The spec doesn't
|
||||
formally cite RFC 6901; both impls treat them similarly but differ at
|
||||
edges:
|
||||
|
||||
- **WinUI**: strict RFC 6901 via `DataModelStore.SetByPointer` /
|
||||
`Read` (`src/OpenClaw.Tray.WinUI/A2UI/DataModel/DataModelStore.cs`).
|
||||
- **Lit**: relative paths supported (`.` = current `dataContextPath`,
|
||||
bare names resolve relative to context); auto-parses `valueString`
|
||||
fields that look like JSON (`vendor/a2ui/.../model-processor.ts:198–225`).
|
||||
This is convenient but can be surprising — a string `"[1,2]"` becomes
|
||||
an array.
|
||||
|
||||
## Data model
|
||||
|
||||
A surface's data model is a JSON tree. `dataModelUpdate` envelopes patch
|
||||
into this tree:
|
||||
|
||||
```jsonc
|
||||
{ "dataModelUpdate": {
|
||||
"surfaceId": "main",
|
||||
"path": "/user",
|
||||
"contents": [
|
||||
{ "key": "name", "valueString": "Ada" },
|
||||
{ "key": "age", "valueNumber": 36 },
|
||||
{ "key": "tags", "valueArray": [
|
||||
{ "valueString": "admin" }, { "valueString": "beta" }
|
||||
]},
|
||||
{ "key": "address","valueMap": [
|
||||
{ "key": "city", "valueString": "London" }
|
||||
]}
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
Behaviors **not nailed down by the spec** that matter in practice:
|
||||
|
||||
| Question | Lit | WinUI |
|
||||
| --- | --- | --- |
|
||||
| Replace vs. merge `valueMap`? | Merge per leaf | Merge per leaf (RFC 6901 set) |
|
||||
| Notification granularity? | Coalesced via Lit signals | Coalesced via subscription set |
|
||||
| Per-update size caps? | None | 1024 entries / update; 256-char keys; 64 KiB strings; 32-deep maps |
|
||||
|
||||
### Subscriptions
|
||||
|
||||
Components watch the model so they can re-render when the agent or another
|
||||
component writes:
|
||||
|
||||
- **Lit**: `@lit-labs/signals`; the root applies an `effect()` to the
|
||||
`childComponents` signal so the light-DOM tree re-renders when the
|
||||
signal fires (`vendor/a2ui/.../ui/root.ts:39, 85`).
|
||||
- **WinUI**: `DataModelObservable.Subscribe(path, callback)` returns
|
||||
`IDisposable`; renderers call `ctx.WatchValue(componentId, name, value, callback)`
|
||||
which installs a per-component subscription that's torn down when the
|
||||
component is recycled (`src/OpenClaw.Tray.WinUI/A2UI/Rendering/IComponentRenderer.cs`).
|
||||
|
||||
## Actions
|
||||
|
||||
A `Button.action` (and other action-bearing properties) declares
|
||||
**what to send to the agent**:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"name": "submit",
|
||||
"context": [
|
||||
{ "key": "email", "value": { "path": "/form/email" } },
|
||||
{ "key": "kind", "value": { "literalString": "primary" } }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
When the user clicks, the client must:
|
||||
|
||||
1. Resolve every `context[].value` against the data model right now.
|
||||
2. Build a `userAction` event:
|
||||
```jsonc
|
||||
{ "userAction": {
|
||||
"name": "submit",
|
||||
"surfaceId": "main",
|
||||
"sourceComponentId": "btn-1",
|
||||
"timestamp": "2026-04-27T17:05:00Z",
|
||||
"context": { "email": "ada@example.com", "kind": "primary" }
|
||||
}}
|
||||
```
|
||||
3. Send it back via A2A (the spec is explicit: **not** on the SSE/JSONL
|
||||
stream).
|
||||
|
||||
### What "context" should and shouldn't contain
|
||||
|
||||
The spec is silent on **scoping** — i.e., is it OK for a Button to
|
||||
declare `context: [{ key: "all", value: { path: "/" } }]` and exfiltrate
|
||||
the entire data model?
|
||||
|
||||
The two impls take very different positions here:
|
||||
|
||||
- **Lit**: passes `action` and `dataContextPath` straight through in a
|
||||
DOM `CustomEvent`. The host (canvas) is responsible for resolving and
|
||||
sanitizing — there's no defense at the renderer.
|
||||
- **WinUI**: `RenderContext.BuildActionContext()` (`IComponentRenderer.cs:183–249`)
|
||||
collects an **allowed-paths set** from either:
|
||||
- explicit `dataBinding: [ { path: "..." } ]` on the component, or
|
||||
- implicit walk over component properties' own `A2UIValue.path` values.
|
||||
|
||||
Each declared `context[].path` is then `IsAllowedPath`-filtered (exact
|
||||
match or ancestor with `/` boundary). Secret paths (registered or
|
||||
denylisted by substring) are excluded unless explicitly allowed.
|
||||
|
||||
This is one of the most consequential **good deviations** in the WinUI
|
||||
impl — see [`grading.md#security-deviations`](./grading.md#security-deviations).
|
||||
|
||||
### Transport
|
||||
|
||||
After context is built, both impls hand off to a transport:
|
||||
|
||||
- **Lit**: dispatches `StateEvent<"a2ui.action">` (CustomEvent, bubbling,
|
||||
composed). Listener wires up however the embedding app wants.
|
||||
- **WinUI**: `ActionDispatcher` (`src/OpenClaw.Tray.WinUI/A2UI/Actions/IActionSink.cs`):
|
||||
- **Debounces** by `surfaceId|sourceComponentId|name` (200 ms window).
|
||||
- **Single-flight gate** so a fallback dequeue can't race a fresh send.
|
||||
- **Fallback queue** when no transport is connected.
|
||||
- Tries each registered transport (`GatewayActionTransport`,
|
||||
`LoggingActionTransport`) until one delivers.
|
||||
|
||||
For the gateway path, `GatewayActionTransport`
|
||||
(`src/OpenClaw.Tray.WinUI/A2UI/Actions/GatewayActionTransport.cs`) emits
|
||||
an `agent.request` node event:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"message": "CANVAS_A2UI action=submit session=main surface=main component=btn-1 host=… instance=… ctx=… default=update_canvas",
|
||||
"sessionKey": "main",
|
||||
"thinking": "low",
|
||||
"deliver": false,
|
||||
"key": "<action-id>"
|
||||
}
|
||||
```
|
||||
|
||||
`AgentMessageFormatter` is a deliberate byte-for-byte port of the Android
|
||||
node's formatter — the gateway parses tags identically across platforms.
|
||||
|
||||
## Security boundaries
|
||||
|
||||
| Concern | Spec | Lit | WinUI |
|
||||
| --- | --- | --- | --- |
|
||||
| URL fetching for `Image`/`Video`/`AudioPlayer` | silent | unrestricted | HTTPS+allowlist for all three; DNS-rebinding pin only on `Image` fetches (`MediaResolver.cs`'s `SocketsHttpHandler.ConnectCallback`). `Video`/`AudioPlayer` hand the validated URI to `MediaSource.CreateFromUri`, which performs its own DNS at playback — allowlist is the load-bearing defense for media. |
|
||||
| Unknown component types | "render placeholder, don't crash" | placeholder for spec'd missing; **registers user-supplied custom elements** if a flag is set | strict 18-only `UnknownRenderer` placeholder |
|
||||
| Markdown / HTML in `Text` | spec says plain string | parses Markdown; HTML blocks rendered in `iframe sandbox=""`; code escaped | renders as plain string |
|
||||
| Action context leakage | underspecified | passthrough — host's problem | server allowlist + secret denylist |
|
||||
| Bearer / token surfaces | n/a | n/a | MCP token shown in Settings UI w/ copy button (out-of-band) |
|
||||
| `canvas.navigate` | n/a (out of A2UI) | n/a | `HttpUrlValidator` gates URLs; user choice of "canvas" vs "browser" opener |
|
||||
|
||||
The "Spec is silent" rows are the spots where a reviewer should keep
|
||||
their guard up — anything Lit forwards to the embedding host can become
|
||||
a vulnerability if that host doesn't apply policy.
|
||||
355
docs/a2ui/grading.md
Normal file
355
docs/a2ui/grading.md
Normal file
@ -0,0 +1,355 @@
|
||||
# A2UI v0.8 — Implementation Grading
|
||||
|
||||
This grades two implementations against the v0.8 spec
|
||||
(<https://a2ui.org/specification/v0.8-a2ui/>):
|
||||
|
||||
- **Lit reference** at `C:\Users\andersonch\Code\openclaw\vendor\a2ui\renderers\lit\src\0.8`
|
||||
- **Native WinUI** in this repo at `src/OpenClaw.Tray.WinUI/A2UI/`
|
||||
|
||||
The Lit code looks like the canonical browser renderer the OpenClaw
|
||||
canvas host ships; the WinUI code is this repo's branch
|
||||
`feat/a2ui-native-winui`.
|
||||
|
||||
Citations use repo-local paths. Lit paths are anchored at the OpenClaw
|
||||
checkout: `openclaw\vendor\a2ui\renderers\lit\src\0.8\`. WinUI paths are
|
||||
anchored at `src/OpenClaw.Tray.WinUI/A2UI/`.
|
||||
|
||||
## Method
|
||||
|
||||
For each spec area, deductions land in two buckets:
|
||||
|
||||
- **Gap** — implementation is missing or wrong vs. spec. Letter grade penalty.
|
||||
- **Good deviation** — implementation does something the spec _doesn't say
|
||||
to do_, but it's the correct call. Listed but doesn't penalize.
|
||||
|
||||
Grades are A–F, separately for Lit and WinUI. There is no curving —
|
||||
"A" means it would pass a strict spec audit and a strict security
|
||||
audit; "B" means it works for normal traffic but fails under a hostile
|
||||
agent; etc.
|
||||
|
||||
---
|
||||
|
||||
## Scorecard
|
||||
|
||||
| Area | Lit | WinUI | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| Component coverage (catalog completeness) | A | A | both 18/18 |
|
||||
| Component property completeness | B | A− | Lit has 4 documented TODOs; WinUI has minor distribution mappings |
|
||||
| Streaming / JSONL parsing | B | A | Lit: lenient; WinUI: lenient + size caps |
|
||||
| Data binding / `A2UIValue` | B+ | A | Lit auto-parses JSON strings (surprising); WinUI strict RFC 6901 |
|
||||
| Action transport | B | A | Lit: DOM event passthrough; WinUI: debounced + single-flight + fallback queue + gateway tag protocol |
|
||||
| Action context security | D | A | Lit punts to host; WinUI scopes to declared `dataBinding` and redacts secrets |
|
||||
| Theming | A− | A− | Equivalent power; different idioms |
|
||||
| URL safety / SSRF | F | A− | Lit unrestricted; WinUI HTTPS+allowlist for `Image`/`Video`/`AudioPlayer`, plus DNS-rebinding pin on `Image` fetches only |
|
||||
| Modal lifecycle | A− | A | Both work; WinUI uses native `ContentDialog` |
|
||||
| List virtualization | C | A | Lit builds all items; WinUI uses `ItemsRepeater` w/ recycling |
|
||||
| Bi-directional binding (write-back) | A | A | Both implement; spec is silent (good deviation) |
|
||||
| Markdown in `Text` | B+ | n/a | Lit's enhancement is real but increases attack surface |
|
||||
| Test coverage | D | A− | Lit: 1 model test, no per-component; WinUI: render matrix + scale + integration |
|
||||
| Spec deviations called out (good ones) | B | A | Lit's improvements partially offset its gaps |
|
||||
| **Overall** | **B−** | **A−** | |
|
||||
|
||||
The two "A" grades have very different shapes:
|
||||
|
||||
- **Lit** is a smaller codebase that gets the happy path right, with two
|
||||
notable **good** deviations (Markdown rendering, bi-directional binding)
|
||||
but several papercut **gaps** and a **non-trivial security delta**
|
||||
inherited from a "the host will sanitize" posture.
|
||||
- **WinUI** is significantly more code, fills almost every gap, and adds
|
||||
defenses the spec doesn't ask for. Its remaining minus comes from the
|
||||
things it _doesn't_ do yet (List `template` mode, Row wrap, `MultipleChoice.variant`).
|
||||
|
||||
---
|
||||
|
||||
## Lit implementation — detailed deductions
|
||||
|
||||
### Documented `TODO` gaps
|
||||
|
||||
Verbatim TODOs in `vendor/a2ui/.../ui/root.ts` and component files:
|
||||
|
||||
| Property | File:Line | Status |
|
||||
| --- | --- | --- |
|
||||
| `Divider.thickness` / `axis` / `color` | `ui/root.ts:317` | type declared, value not applied to `<hr>` |
|
||||
| `MultipleChoice.maxAllowedSelections` | `ui/root.ts:334` | accepted but not enforced |
|
||||
| `TextField.validationRegexp` | `ui/root.ts:367` | not applied to `<input>` |
|
||||
| `DateTimeInput.outputFormat` | `ui/datetime-input.ts:159` | placeholder; always uses browser format |
|
||||
| `MultipleChoice.selections` resolution | `ui/multiple-choice.ts:87–103` | logic incomplete when `selections` is path-bound |
|
||||
| `AudioPlayer.description` | `ui/audio.ts` | spec'd property silently dropped |
|
||||
|
||||
Letter penalty: **−1 step on Component Property Completeness** (A → B).
|
||||
|
||||
### `A2UIValue.path` resolver auto-parses JSON-shaped strings
|
||||
|
||||
`data/model-processor.ts:198–225` detects `valueString` payloads that look
|
||||
like `{...}` or `[...]` and **silently parses them as JSON**. The intent is
|
||||
"developer convenience"; the consequence is that a string literal containing
|
||||
a `[` or `{` becomes a structured value. This is a **gap** because the spec
|
||||
distinguishes `valueString` from `valueArray`/`valueMap` precisely so the
|
||||
agent can be unambiguous. Letter penalty: **−1 step on data binding**.
|
||||
|
||||
### URLs are passed through to the DOM
|
||||
|
||||
`ui/image.ts:67–74` binds `<img src="${url}">` directly. There is no
|
||||
allowlist for `data:` / `javascript:` / `file:` / private-IP hosts, no
|
||||
SSRF protection, no DNS rebinding defense. The WinUI impl has all of
|
||||
these. The host **may** sanitize before forwarding URLs, but the
|
||||
renderer offers no defense in depth. Letter penalty: **−2 steps on URL
|
||||
safety** (this is the F).
|
||||
|
||||
### Component registry allows arbitrary custom elements
|
||||
|
||||
`vendor/a2ui/.../ui/root.ts:118–140, 441–471` lets the embedding app set
|
||||
`enableCustomElements = true` and then renders any `<component>` whose tag
|
||||
is registered in `componentRegistry`. This is **beyond spec** — useful for
|
||||
extensibility, dangerous for catalog-strict mode. **Not graded as a gap**
|
||||
since it's behind a flag, but it's worth flagging at the host level.
|
||||
|
||||
### One unit test covers everything
|
||||
|
||||
`vendor/a2ui/.../model.test.ts` exercises `A2uiMessageProcessor` for
|
||||
`beginRendering` and `surfaceUpdate`. There are **no per-component
|
||||
render tests, no event-dispatch tests, no markdown sanitizer tests, no
|
||||
data-binding edge-case tests**. Letter penalty: **−2 steps on test
|
||||
coverage** (D).
|
||||
|
||||
### Good deviations
|
||||
|
||||
- **Markdown rendering** in `Text` (`ui/text.ts`, `ui/directives/markdown.ts`).
|
||||
HTML blocks wrapped in `<iframe sandbox="">`; code blocks escaped via
|
||||
`sanitizer.escapeNodeText`. The spec says plain string. Whether this
|
||||
counts as good depends on the threat model — see
|
||||
[the Text/Markdown divergence](#text-markdown-divergence).
|
||||
- **Signal-driven re-render** via `@lit-labs/signals`. Cleaner reactivity
|
||||
than naive `requestUpdate()`.
|
||||
- **Bi-directional binding** in `CheckBox`, `TextField`, `Slider`,
|
||||
`DateTimeInput`. Spec is silent on write-back; both impls add it.
|
||||
|
||||
---
|
||||
|
||||
## WinUI implementation — detailed deductions
|
||||
|
||||
### Property-coverage misses
|
||||
|
||||
| Property | File:Line | Status |
|
||||
| --- | --- | --- |
|
||||
| `Row.distribution` `spaceBetween/Around/Evenly` | `Rendering/Renderers/ContainerRenderers.cs:10–32` | all three collapse to `HorizontalAlignment.Stretch` (WinUI `StackPanel` doesn't natively express justify-content) |
|
||||
| `Row.wrap` (multi-row) | n/a | not implemented; would need a custom `Panel` |
|
||||
| `List.template` mode | `Rendering/Renderers/ContainerRenderers.cs:57–159` | only `explicitList` supported |
|
||||
| `MultipleChoice.variant` (`chips`) | `Rendering/Renderers/InteractiveRenderers.cs:279–430` | always `ComboBox`/`ListView` |
|
||||
| `MultipleChoice.filterable` | same | not honored |
|
||||
| `TextField.validationRegexp` | `Rendering/Renderers/InteractiveRenderers.cs:98–199` | not enforced |
|
||||
| `Tabs` close / reorder | `Rendering/Renderers/ContainerRenderers.cs:187–235` | disabled |
|
||||
| Component `weight` | `Protocol/A2UIProtocol.cs:111–151` | parsed but not applied |
|
||||
|
||||
Letter penalty: **−1 step on Component Property Completeness**, but
|
||||
balanced by being the only impl that fills the corresponding Lit gaps
|
||||
(`maxAllowedSelections` is enforced; `Divider.axis` is honored).
|
||||
|
||||
### Action context scoping (the centerpiece win)
|
||||
|
||||
`Rendering/IComponentRenderer.cs:183–249` (`BuildActionContext`):
|
||||
|
||||
1. Collect `allowed` paths from the component's explicit `dataBinding`
|
||||
array, or — if absent — implicitly walk every `A2UIValue.path` referenced
|
||||
by the component's own properties.
|
||||
2. For each `action.context[]` entry, resolve only if `IsAllowedPath`
|
||||
matches (exact or ancestor with `/` boundary).
|
||||
3. Strip secret paths via `SecretRedactor` (`Rendering/SecretRedactor.cs`):
|
||||
- Registered paths (e.g., obscured `TextField` fields).
|
||||
- Substring denylist: `password`, `secret`, `token`.
|
||||
|
||||
This blocks the trivial "exfiltrate the whole tree" attack without
|
||||
requiring the host to know about A2UI internals. The Lit impl can't
|
||||
do this because it dispatches `action` straight through.
|
||||
|
||||
### URL safety — DNS rebinding defense (Image fetches)
|
||||
|
||||
`Rendering/MediaResolver.cs:57–95`:
|
||||
|
||||
```csharp
|
||||
new SocketsHttpHandler {
|
||||
ConnectCallback = async (ctx, ct) => {
|
||||
var addresses = await Dns.GetHostAddressesAsync(ctx.DnsEndPoint.Host, ct);
|
||||
foreach (var ip in addresses) {
|
||||
if (!IsPublicAddress(ip)) throw ...; // loopback, RFC1918, link-local, multicast
|
||||
}
|
||||
// connect to resolved IP, not hostname (no second DNS lookup)
|
||||
},
|
||||
PooledConnectionLifetime = TimeSpan.FromMinutes(2),
|
||||
};
|
||||
```
|
||||
|
||||
Plus an allowlist gate in `IsAllowed(url)`. Closes a TOCTOU window
|
||||
between an allowlist check and the actual TCP connect. The Lit impl
|
||||
does none of this.
|
||||
|
||||
**Limitation: this pin is image-only.** `Video`/`AudioPlayer` route through
|
||||
`MediaSource.CreateFromUri`, which performs its own DNS resolution at
|
||||
playback time outside the resolver. The HTTPS+allowlist gate still
|
||||
applies to those URLs, but the connect-time IP check does not — see
|
||||
`MediaResolver.TryResolveMediaUri`. A local-proxy approach was scoped
|
||||
out of the v0.8 native renderer; the allowlist is the load-bearing
|
||||
defense for media playback.
|
||||
|
||||
### Streaming hardening
|
||||
|
||||
`Protocol/A2UIProtocol.cs:176–367` and `Hosting/A2UIRouter.cs`:
|
||||
|
||||
| Cap | Value |
|
||||
| --- | --- |
|
||||
| Max line length | 1 MiB |
|
||||
| Max components per surface | 2000 |
|
||||
| Max entries per `dataModelUpdate` | 1024 |
|
||||
| Max key length | 256 |
|
||||
| Max string value | 64 KiB |
|
||||
| Max `valueMap` depth | 32 |
|
||||
| Max render depth | 64 |
|
||||
|
||||
All limits log + drop, never throw. Cycle detection in `_renderingIds`
|
||||
prevents id-loops in malformed surfaces.
|
||||
|
||||
### Component diff on `surfaceUpdate`
|
||||
|
||||
`Hosting/SurfaceHost.cs:ApplyComponents` compares incoming defs (name,
|
||||
weight, properties JSON-string) against the previous set and **skips
|
||||
rebuild if unchanged**. Effect: a re-emitted surface preserves
|
||||
`TextBox` caret position, scroll offset, and `Tabs` selection. The
|
||||
spec calls for "structural diffing"; this is a heuristic that catches
|
||||
the most common case (agent re-emits whole surface).
|
||||
|
||||
### Modal as native `ContentDialog`
|
||||
|
||||
`Rendering/Renderers/ContainerRenderers.cs:237–284` wires up a
|
||||
`ContentDialog` whose `Content` is the `contentChild` and whose trigger
|
||||
is the `entryPointChild` wrapped in a transparent `Button`. Spec leaves
|
||||
the modal _shape_ open; the WinUI impl gives it the full platform-modal
|
||||
treatment (focus trap, ESC dismiss, screen-reader announcement).
|
||||
|
||||
### List virtualization
|
||||
|
||||
`Rendering/Renderers/ContainerRenderers.cs:57–159` uses an
|
||||
`ItemsRepeater` with a `ChildIdTemplate` cache keyed by component id.
|
||||
Recycled elements are pulled from the cache so their data-binding
|
||||
subscriptions stay alive across scrolling. The Lit impl has no
|
||||
virtualization.
|
||||
|
||||
### Test surface
|
||||
|
||||
| Project | Focus |
|
||||
| --- | --- |
|
||||
| `OpenClaw.Shared.Tests/A2UICapabilitySecurityTests.cs` | protocol, secret redaction |
|
||||
| `OpenClaw.Tray.UITests/A2UIRenderingTests.cs` | per-component XAML rendering, data binding, live updates |
|
||||
| `OpenClaw.Tray.UITests/A2UIControlMatrixTests.cs` | property matrix coverage |
|
||||
| `OpenClaw.Tray.UITests/A2UIDashboardScaleTest.cs` | 1000+ component stress |
|
||||
| `OpenClaw.Tray.UITests/A2UIThemeTests.cs` | theme parsing |
|
||||
| `OpenClaw.Tray.UITests/A2UISvgTests.cs` | SVG decode + 8s timeout |
|
||||
| `OpenClaw.Tray.IntegrationTests/A2UICanvasIntegrationTests.cs` | end-to-end MCP smoke + PNG capture |
|
||||
|
||||
Coverage merged across all three suites via `dotnet-coverage` (per the
|
||||
auto-memory note). Letter grade A−; the missing step is that the
|
||||
gateway-action transport unit tests aren't fully isolated (depend on a
|
||||
fake `WindowsNodeClient`).
|
||||
|
||||
### Good deviations
|
||||
|
||||
| Deviation | File | Why it's good |
|
||||
| --- | --- | --- |
|
||||
| DNS rebinding defense (image fetches) | `Rendering/MediaResolver.cs:57–95` | spec doesn't ask but a hostile agent can otherwise pivot through the image fetch path to internal HTTP services. Does not extend to `Video`/`AudioPlayer` — see "URL safety" section. |
|
||||
| Action context allowlist | `Rendering/IComponentRenderer.cs:183–249` | minimum-information principle; spec leaves this open |
|
||||
| Secret denylist | `Rendering/SecretRedactor.cs` | catches `/auth/sessionToken` style names automatically |
|
||||
| `surfaceUpdate` diff | `Hosting/SurfaceHost.cs` | preserves caret/scroll/selection on re-emit |
|
||||
| Single-flight gate on action dispatch | `Actions/IActionSink.cs:27–142` | prevents fallback dequeue racing fresh send |
|
||||
| Per-surface theme scope | `Hosting/SurfaceHost.cs ApplyThemeToScope` | multi-surface tab views don't bleed themes |
|
||||
| `IA2UITelemetry` seam | `Telemetry/IA2UITelemetry.cs` | structured events instead of log scraping |
|
||||
| Single-handler `Func` events on `CanvasCapability` | reviewed in commit `5b9c468` | catches accidental multi-subscribe instead of silent `Delegate.Combine` |
|
||||
| MCP bearer token in Settings UI | `SettingsPage.xaml.cs` | quality-of-life for MCP setup, kept out of action payloads |
|
||||
|
||||
---
|
||||
|
||||
## Side-by-side: where they diverge meaningfully
|
||||
|
||||
### `Text` / Markdown divergence
|
||||
|
||||
The Lit impl renders Markdown; the WinUI impl renders plain text. This is
|
||||
the **biggest functional UX difference** between the two.
|
||||
|
||||
Lit's defense is `iframe sandbox=""` for HTML blocks plus
|
||||
`escapeNodeText` for code. That's a reasonable sandbox model in the
|
||||
browser — but every line still expands the renderer's attack surface
|
||||
beyond the spec's "plain string" promise.
|
||||
|
||||
For ms-windows-node, parity is **probably not worth chasing** unless
|
||||
the agent surfaces depend on it: WinUI doesn't have a built-in
|
||||
Markdown engine, and adding one means importing a dependency that has
|
||||
to be kept in lockstep with Lit's rendering choices to avoid surfaces
|
||||
that look right in the browser and broken on Windows. The defensible
|
||||
choice is to ask the agent to emit explicit `Text + usageHint`
|
||||
hierarchies instead of inline Markdown.
|
||||
|
||||
### List performance
|
||||
|
||||
If a surface includes a `List` of 200+ items, the Lit renderer will
|
||||
build all 200 children before paint. WinUI builds ~10 (whatever fits
|
||||
the viewport) and recycles as the user scrolls. For this repo's
|
||||
typical agent surfaces (dashboards, conversation panels) this is the
|
||||
single biggest performance delta.
|
||||
|
||||
### Action security model
|
||||
|
||||
The two impls have completely different threat models:
|
||||
|
||||
- **Lit + browser canvas host**: assume the embedding app is
|
||||
trustworthy and will sanitize. The renderer is a thin presenter.
|
||||
- **WinUI tray**: assume the renderer talks to a hostile agent over an
|
||||
arbitrary network. Apply policy in the renderer.
|
||||
|
||||
Neither is wrong, but a host that wants Lit-grade isolation has to
|
||||
build the same allowlist/denylist logic that WinUI bakes in. In
|
||||
practice that means anyone embedding the Lit renderer outside
|
||||
OpenClaw's canvas host needs to **wrap action handlers**, never just
|
||||
forward them.
|
||||
|
||||
---
|
||||
|
||||
## Known deviations by category
|
||||
|
||||
For PR reviewers — quick "is this OK?" reference.
|
||||
|
||||
| Deviation | Spec status | Lit | WinUI | Verdict |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| Bi-directional data-model write on user input | silent | ✓ | ✓ | Good — spec assumes it implicitly |
|
||||
| Markdown in `Text` | violation (plain string) | ✓ | ✗ | Lit: useful but expands attack surface; WinUI: stay plain |
|
||||
| Custom-element registry beyond catalog | violation (catalog-strict) | ✓ (flag) | ✗ | Risk; only enable in trusted hosts |
|
||||
| `valueString` auto-parsed as JSON | violation (type erasure) | ✓ | ✗ | Bug-shaped; rely on `valueMap`/`valueArray` |
|
||||
| Hard size caps on stream / model | silent | ✗ | ✓ | Good — DoS defense |
|
||||
| URL allowlist on media | silent | ✗ | ✓ | Good — SSRF defense |
|
||||
| DNS-rebinding defense (image fetches) | silent | ✗ | ✓ | Good — beyond allowlist. Image only; `Video`/`AudioPlayer` rely on the allowlist alone (OS media stack re-resolves at playback). |
|
||||
| Action context allowlist | silent | ✗ | ✓ | Good — minimum information |
|
||||
| Secret-path redaction | silent | ✗ | ✓ | Good — keeps tokens off the wire |
|
||||
| Component diff on `surfaceUpdate` | "structural diffing" (vague) | ✗ | ✓ | Good — preserves UI state |
|
||||
| `List` virtualization | "should virtualize" | ✗ | ✓ | Good — required for non-trivial surfaces |
|
||||
| `Modal` as native `ContentDialog` | shape open | `<dialog>` | `ContentDialog` | Both fine |
|
||||
| `MultipleChoice` single-mode writes scalar | spec implies array | array | scalar | WinUI's reads tolerate either; talk to your agent format |
|
||||
| `validationRegexp` (TextField) | spec property | ✗ TODO | ✗ | Both have a gap here |
|
||||
|
||||
---
|
||||
|
||||
## Recommended follow-ups (not part of grading)
|
||||
|
||||
These are the changes that would close the remaining minuses:
|
||||
|
||||
**WinUI (A− → A)**
|
||||
- Honor `MultipleChoice.variant` (`chips`) and `filterable`.
|
||||
- Apply `TextField.validationRegexp` (the catalog says it's a string;
|
||||
compile + on-change validate).
|
||||
- Consider `List.template` mode for surfaces that bind a list to a
|
||||
data-model array (also unblocks v0.9 readiness).
|
||||
- Add unit tests for `GatewayActionTransport` payload shape.
|
||||
|
||||
**Lit (B− → B+ or higher)**
|
||||
- Resolve the four documented `TODO`s (Divider, TextField,
|
||||
DateTimeInput, MultipleChoice).
|
||||
- Add per-component render tests and a markdown-sanitizer test suite.
|
||||
- Add at least an opt-in URL allowlist for media components.
|
||||
- Document the `enableCustomElements` flag's risk surface for
|
||||
embedding apps.
|
||||
173
docs/a2ui/protocol.md
Normal file
173
docs/a2ui/protocol.md
Normal file
@ -0,0 +1,173 @@
|
||||
# A2UI v0.8 — Protocol
|
||||
|
||||
This is a faithful summary of the v0.8 wire format, distilled from
|
||||
<https://a2ui.org/specification/v0.8-a2ui/> and
|
||||
<https://a2ui.org/specification/v0.8-a2a-extension/>.
|
||||
|
||||
## 1. Architecture
|
||||
|
||||
A2UI is a **streaming, declarative UI protocol** for LLM-generated
|
||||
interfaces:
|
||||
|
||||
- **Server → client**: a JSONL stream (typically over SSE, but the protocol
|
||||
is transport-agnostic) carrying UI updates.
|
||||
- **Client → server**: A2A messages reporting user events.
|
||||
- **Surfaces**: independently-controllable UI regions, addressed by
|
||||
`surfaceId`. A single agent stream can manage many surfaces in parallel.
|
||||
|
||||
The component model is an **adjacency list** — a flat dictionary of
|
||||
`id → component`, with parents referencing children by id. This is easier
|
||||
for an LLM to emit incrementally than nested trees and is the foundation of
|
||||
progressive rendering.
|
||||
|
||||
## 2. Server → client envelopes
|
||||
|
||||
Each JSONL line is a JSON object containing **exactly one** of these keys:
|
||||
|
||||
| Key | Purpose |
|
||||
| --- | --- |
|
||||
| `surfaceUpdate` | Add or replace components in a surface's adjacency list |
|
||||
| `dataModelUpdate` | Mutate the surface's data model |
|
||||
| `beginRendering` | Signal "ready to render"; specify `root` and chosen catalog |
|
||||
| `deleteSurface` | Tear down a surface |
|
||||
|
||||
### 2.1 `surfaceUpdate`
|
||||
|
||||
```json
|
||||
{ "surfaceUpdate": {
|
||||
"surfaceId": "main",
|
||||
"components": [
|
||||
{ "id": "btn-1",
|
||||
"component": { "Button": { "child": "lbl-1", "action": { ... } } } }
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
Each entry has `id`, exactly one `component.{TypeName}` object, and an
|
||||
optional `weight` (used when the parent applies weighted distribution; not
|
||||
all parents honor it). The component definition is **catalog-validated**:
|
||||
unknown types fall back to a placeholder (clients MUST NOT crash on unknown
|
||||
types).
|
||||
|
||||
### 2.2 `dataModelUpdate`
|
||||
|
||||
```json
|
||||
{ "dataModelUpdate": {
|
||||
"surfaceId": "main",
|
||||
"path": "/optional/base",
|
||||
"contents": [
|
||||
{ "key": "name", "valueString": "Ada" },
|
||||
{ "key": "age", "valueNumber": 36 },
|
||||
{ "key": "active", "valueBoolean": true },
|
||||
{ "key": "address","valueMap": [ { "key": "city", "valueString": "London" } ] }
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
The `contents` array is a **typed key-value list** — `valueString`,
|
||||
`valueNumber`, `valueBoolean`, `valueMap`, `valueArray`. Updates are merged
|
||||
into the surface's data model rooted at `path` (default `/`). The spec
|
||||
leaves "merge vs replace" semantics underspecified; in practice both
|
||||
reference clients overwrite leaves and recurse into maps.
|
||||
|
||||
A special idiom — `path: "/x", contents: [{ "key": ".", "valueString": "v" }]`
|
||||
— is used to set a primitive at a non-root path.
|
||||
|
||||
### 2.3 `beginRendering`
|
||||
|
||||
```json
|
||||
{ "beginRendering": {
|
||||
"surfaceId": "main",
|
||||
"catalogId": "https://a2ui.org/specification/v0_8/standard_catalog_definition.json",
|
||||
"root": "card-1"
|
||||
}}
|
||||
```
|
||||
|
||||
Acts as a **synchronization gate**: until the client sees this, it should
|
||||
buffer components/data without rendering. `catalogId` is optional —
|
||||
default is the v0.8 standard catalog. `styles` may also appear here for
|
||||
per-surface theme tokens.
|
||||
|
||||
### 2.4 `deleteSurface`
|
||||
|
||||
```json
|
||||
{ "deleteSurface": { "surfaceId": "main" } }
|
||||
```
|
||||
|
||||
Disposes the surface, its data model, and any subscriptions.
|
||||
|
||||
## 3. Client → server events
|
||||
|
||||
### 3.1 `userAction`
|
||||
|
||||
```json
|
||||
{ "userAction": {
|
||||
"name": "submit",
|
||||
"surfaceId": "main",
|
||||
"sourceComponentId": "btn-1",
|
||||
"timestamp": "2026-04-27T17:05:00Z",
|
||||
"context": { "email": "ada@example.com" }
|
||||
}}
|
||||
```
|
||||
|
||||
`context` is the **resolved** snapshot of the action's `context[]`
|
||||
(BoundValues evaluated against the data model at click time — see
|
||||
[`data-and-actions.md`](./data-and-actions.md)).
|
||||
|
||||
### 3.2 `error`
|
||||
|
||||
A client-side error reporting envelope. The spec leaves the body shape
|
||||
underspecified.
|
||||
|
||||
## 4. A2A extension (v0.8)
|
||||
|
||||
A2UI rides on **A2A** as a typed extension:
|
||||
|
||||
- Extension URI: `https://a2ui.org/a2a-extension/a2ui/v0.8`
|
||||
- Messages are A2A `DataPart` objects with `mimeType: "application/json+a2ui"`.
|
||||
- Capability negotiation:
|
||||
- **Agent advertises** in `AgentCapabilities.extensions`:
|
||||
- `supportedCatalogIds: string[]`
|
||||
- `acceptsInlineCatalogs: bool`
|
||||
- **Client declares** support via transport-specific signaling
|
||||
(`X-A2A-Extensions` HTTP header, gRPC metadata, JSON-RPC mechanism).
|
||||
- Client may include in A2A message metadata:
|
||||
```json
|
||||
{ "metadata": { "a2uiClientCapabilities": {
|
||||
"supportedCatalogIds": [ "https://a2ui.org/.../standard_catalog_definition.json" ],
|
||||
"inlineCatalogs": [ { "catalogId": "...", "components": {...}, "styles": {...} } ]
|
||||
}}}
|
||||
```
|
||||
- Server picks one in the next `beginRendering`.
|
||||
|
||||
The available spec text is partial — push/pull operations, retry,
|
||||
backpressure, and authentication details are **delegated to the A2A layer**
|
||||
or to implementations.
|
||||
|
||||
## 5. Lifecycle
|
||||
|
||||
1. Client opens an A2A session and announces capabilities.
|
||||
2. Server starts a JSONL stream:
|
||||
1. Emits `surfaceUpdate` and `dataModelUpdate` lines (any order).
|
||||
2. Emits `beginRendering` once the surface is render-ready.
|
||||
3. Client renders the tree rooted at `root`.
|
||||
4. User interacts → client emits `userAction` (A2A message, **not** on the
|
||||
JSONL stream).
|
||||
5. Server responds with more JSONL.
|
||||
6. Server emits `deleteSurface` when done, or session ends.
|
||||
|
||||
## 6. Implementation notes (deltas from raw spec)
|
||||
|
||||
These behaviors are spec-silent or under-specified; both reference
|
||||
implementations and this repo make pragmatic choices:
|
||||
|
||||
- **Line-delimited JSON parsing** must tolerate malformed lines gracefully —
|
||||
a single bad line MUST NOT abort the stream. Both impls log + skip.
|
||||
- **Size caps** on lines, components per surface, data-model entries.
|
||||
WinUI applies hard caps (1 MiB / 2000 / 1024); Lit does not.
|
||||
- **Modal lifecycle**: spec defines `entryPointChild` + `contentChild` but
|
||||
not _when_ the modal is open. Lit uses `<dialog>.showModal()` driven by
|
||||
internal state; WinUI uses a `ContentDialog` triggered by entry click.
|
||||
- **Streaming partial components**: a `surfaceUpdate` may reference an
|
||||
`id` whose contents arrive on a later line. Clients MUST defer rendering
|
||||
of undefined refs, not throw.
|
||||
461
docs/gateway-node-integration.md
Normal file
461
docs/gateway-node-integration.md
Normal file
@ -0,0 +1,461 @@
|
||||
# OpenClaw Gateway ↔ Windows Node Integration Guide
|
||||
|
||||
> Last updated: 2026-04-26
|
||||
> Source of truth: [`openclaw/openclaw` — `src/gateway/node-command-policy.ts`](https://github.com/openclaw/openclaw/blob/main/src/gateway/node-command-policy.ts)
|
||||
|
||||
This document captures everything we've learned about how the OpenClaw gateway handles node commands, platform allowlists, and the QR bootstrap pairing flow. It exists because these details are not obvious from the docs alone and caused real debugging sessions.
|
||||
|
||||
---
|
||||
|
||||
## 1. The Gateway Command Allowlist System
|
||||
|
||||
Every command a node sends must pass **two** gates before it works:
|
||||
|
||||
1. **The node must declare it** — in the `commands` array of the `connect` handshake
|
||||
2. **The gateway must allow it** — via a per-platform allowlist in `node-command-policy.ts`
|
||||
|
||||
If either gate fails, the command is silently dropped or rejected with:
|
||||
```
|
||||
node command not allowed: "X" is not in the allowlist for platform "Y"
|
||||
```
|
||||
|
||||
### 1.1 Per-Platform Default Allowlists
|
||||
|
||||
The gateway has hardcoded defaults per platform (from `PLATFORM_DEFAULTS`):
|
||||
|
||||
| Platform | Default Commands |
|
||||
|----------|-----------------|
|
||||
| **macOS** | canvas.*, camera.list, location.get, device.info/status, contacts.search, calendar.events, reminders.list, photos.latest, motion.*, system.run/which/notify, screen.snapshot, browser.proxy |
|
||||
| **iOS** | canvas.*, camera.list, location.get, device.info/status, contacts.*, calendar.*, reminders.*, photos.latest, motion.*, system.notify |
|
||||
| **Android** | canvas.*, camera.list, location.get, notifications.*, device.*, contacts.*, calendar.*, callLog.search, reminders.*, photos.latest, motion.*, system.notify |
|
||||
| **Windows** | **system.run, system.run.prepare, system.which, system.notify, browser.proxy** |
|
||||
| **Linux** | system.run, system.run.prepare, system.which, system.notify, browser.proxy |
|
||||
| **Unknown** | canvas.*, camera.list, location.get, system.notify |
|
||||
|
||||
**Windows and Linux get almost nothing by default** — only system commands. No canvas, no camera, no screen, no location. This is because Windows/Linux were originally designed as headless "node host" platforms (exec-only), not full companion apps like macOS/iOS.
|
||||
|
||||
### 1.2 "Dangerous" Commands (Always Need Explicit Opt-In)
|
||||
|
||||
These commands are **never** in any platform's defaults, regardless of platform:
|
||||
|
||||
```typescript
|
||||
CAMERA_DANGEROUS_COMMANDS = ["camera.snap", "camera.clip"]
|
||||
SCREEN_DANGEROUS_COMMANDS = ["screen.record"]
|
||||
CONTACTS_DANGEROUS_COMMANDS = ["contacts.add"]
|
||||
CALENDAR_DANGEROUS_COMMANDS = ["calendar.add"]
|
||||
REMINDERS_DANGEROUS_COMMANDS = ["reminders.add"]
|
||||
SMS_DANGEROUS_COMMANDS = ["sms.send", "sms.search"]
|
||||
```
|
||||
|
||||
Even macOS doesn't get `camera.snap` or `camera.clip` by default! They must be added via `gateway.nodes.allowCommands`.
|
||||
|
||||
### 1.3 How to Enable Commands for Windows
|
||||
|
||||
Add ALL needed commands to `gateway.nodes.allowCommands` in `~/.openclaw/openclaw.json`:
|
||||
|
||||
```json5
|
||||
{
|
||||
gateway: {
|
||||
nodes: {
|
||||
allowCommands: [
|
||||
// Canvas
|
||||
"canvas.present",
|
||||
"canvas.hide",
|
||||
"canvas.navigate",
|
||||
"canvas.eval",
|
||||
"canvas.snapshot",
|
||||
"canvas.a2ui.push",
|
||||
"canvas.a2ui.pushJSONL",
|
||||
"canvas.a2ui.reset",
|
||||
// Camera (all are dangerous or not in Windows defaults)
|
||||
"camera.list",
|
||||
"camera.snap",
|
||||
"camera.clip",
|
||||
// Screen
|
||||
"screen.snapshot",
|
||||
"screen.record",
|
||||
// Location
|
||||
"location.get",
|
||||
// Device metadata/status
|
||||
"device.info",
|
||||
"device.status",
|
||||
// Text-to-speech playback (enable only when agent-driven audio is desired)
|
||||
"tts.speak",
|
||||
// System (already in Windows defaults, but listed for completeness)
|
||||
// "system.run",
|
||||
// "system.run.prepare",
|
||||
// "system.which",
|
||||
// "system.notify",
|
||||
// Exec approvals
|
||||
"system.execApprovals.get",
|
||||
"system.execApprovals.set",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
After changing config:
|
||||
```bash
|
||||
openclaw gateway restart
|
||||
```
|
||||
|
||||
After changing the node's command list (code change), you must **re-pair**:
|
||||
```bash
|
||||
openclaw devices list # find old device
|
||||
openclaw devices reject <id> # reject the old pairing
|
||||
# Node will auto-reconnect and create a new pairing request
|
||||
openclaw devices list # find new request
|
||||
openclaw devices approve <id> # approve with updated commands
|
||||
```
|
||||
|
||||
### 1.4 Why Re-Pairing is Needed
|
||||
|
||||
The gateway snapshots the node's declared `commands` array at **pairing approval time**. If you change the node's code to add new commands and restart it, the gateway still uses the old snapshot. You must reject the old pairing and approve a new one.
|
||||
|
||||
### 1.5 `denyCommands`
|
||||
|
||||
You can also explicitly deny commands:
|
||||
```json5
|
||||
{ gateway: { nodes: { denyCommands: ["system.run"] } } }
|
||||
```
|
||||
`denyCommands` wins over `allowCommands`.
|
||||
|
||||
---
|
||||
|
||||
## 2. Command Name Mismatches (Bugs We Found)
|
||||
|
||||
### 2.1 `screen.capture` → Should Be `screen.snapshot`
|
||||
|
||||
The Windows node previously registered `screen.capture` as a command name. The gateway calls it **`screen.snapshot`**:
|
||||
|
||||
```typescript
|
||||
// Gateway source (node-command-policy.ts)
|
||||
const SCREEN_COMMANDS = ["screen.snapshot"];
|
||||
```
|
||||
|
||||
The macOS node uses `screen.snapshot`. `screen.capture` is not recognized by the gateway at all — it's silently filtered out of the declared commands.
|
||||
|
||||
**Fixed locally**: `ScreenCapability.cs` now advertises and handles `screen.snapshot`.
|
||||
|
||||
### 2.2 `screen.list` — Not a Gateway Command
|
||||
|
||||
Our node previously registered `screen.list`. This command does not exist in the gateway's command policy. It's never in any default allowlist.
|
||||
|
||||
**Fixed locally**: `screen.list` is no longer advertised.
|
||||
|
||||
### 2.3 `screen.record.start` / `screen.record.stop` — Not Mac/Gateway Commands
|
||||
|
||||
PR #159 originally explored session-based start/stop recording commands, but the current Mac node and gateway command surface only define fixed-duration `screen.record`.
|
||||
|
||||
**Fixed locally**: Windows now implements only fixed-duration `screen.record`; `screen.record.start` and `screen.record.stop` are intentionally not advertised.
|
||||
|
||||
### 2.4 Verified Correct Names
|
||||
|
||||
| Our Command | Gateway Canonical | Status |
|
||||
|-------------|-------------------|--------|
|
||||
| `camera.list` | `camera.list` | ✅ Match |
|
||||
| `camera.snap` | `camera.snap` | ✅ Match (dangerous) |
|
||||
| `camera.clip` | `camera.clip` | ✅ Match (dangerous) |
|
||||
| `screen.snapshot` | `screen.snapshot` | ✅ Match |
|
||||
| `location.get` | `location.get` | ✅ Match |
|
||||
| `system.notify` | `system.notify` | ✅ Match |
|
||||
| `system.run` | `system.run` | ✅ Match |
|
||||
| `system.run.prepare` | `system.run.prepare` | ✅ Match |
|
||||
| `system.which` | `system.which` | ✅ Match |
|
||||
| `canvas.present` | `canvas.present` | ✅ Match |
|
||||
| `canvas.hide` | `canvas.hide` | ✅ Match |
|
||||
| `canvas.navigate` | `canvas.navigate` | ✅ Match |
|
||||
| `canvas.eval` | `canvas.eval` | ✅ Match |
|
||||
| `canvas.snapshot` | `canvas.snapshot` | ✅ Match |
|
||||
| `canvas.a2ui.push` | `canvas.a2ui.push` | ✅ Match |
|
||||
| `canvas.a2ui.pushJSONL` | `canvas.a2ui.pushJSONL` | ✅ Match (legacy alias) |
|
||||
| `canvas.a2ui.reset` | `canvas.a2ui.reset` | ✅ Match |
|
||||
| `device.info` | `device.info` | ✅ Match |
|
||||
| `device.status` | `device.status` | ✅ Match |
|
||||
| `screen.record` | `screen.record` | ✅ Match (dangerous) |
|
||||
|
||||
### 2.5 Remaining Command Gaps vs Current Mac Node
|
||||
|
||||
| Command | macOS | Windows | Notes |
|
||||
|---------|-------|---------|-------|
|
||||
| `browser.proxy` | ✅ | ✅ | Local browser-control bridge; requires browser control host on gateway port + 2, retries with password/basic auth if bearer auth is rejected, and managed SSH tunnel mode forwards local+2 to remote+2 when enabled |
|
||||
|
||||
### 2.6 Safe Gateway-Policy Gaps to Consider
|
||||
|
||||
The gateway's macOS/iOS default allowlists include other mobile-oriented commands such as contacts, calendar, reminders, photos, and motion. Those remain outside the Windows tray's current companion-node scope.
|
||||
|
||||
---
|
||||
|
||||
## 3. Platform Detection
|
||||
|
||||
The gateway detects platform from two fields in the `connect` handshake:
|
||||
|
||||
```typescript
|
||||
// Our connect payload
|
||||
client: {
|
||||
platform: "windows", // ← Primary signal
|
||||
mode: "node",
|
||||
}
|
||||
```
|
||||
|
||||
Detection logic (from `node-command-policy.ts`):
|
||||
1. Normalize `platform` → lowercase
|
||||
2. Match against prefix rules: `"win"` → windows, `"mac"/"darwin"` → macos, etc.
|
||||
3. If no match, try `deviceFamily` field
|
||||
4. If still no match → `"unknown"` (gets conservative defaults)
|
||||
|
||||
Our node sends `platform: "windows"` which correctly matches the `windows` prefix rule.
|
||||
|
||||
**The problem isn't detection — it's that the `windows` platform intentionally gets a minimal allowlist.** The gateway team designed Windows as a headless exec host, not a full companion app with camera/canvas/screen.
|
||||
|
||||
### 3.1 What "Unknown" Gets (and Why It's Actually Better)
|
||||
|
||||
Ironically, the `unknown` platform gets MORE than Windows:
|
||||
```typescript
|
||||
unknown: [
|
||||
...CANVAS_COMMANDS,
|
||||
...CAMERA_COMMANDS, // camera.list
|
||||
...LOCATION_COMMANDS, // location.get
|
||||
NODE_SYSTEM_NOTIFY_COMMAND,
|
||||
]
|
||||
```
|
||||
|
||||
If we sent `platform: "windows-desktop"` (which wouldn't match any prefix rule), we'd fall through to `unknown` and actually get canvas/camera/location defaults. But that would be a hack — the right fix is `gateway.nodes.allowCommands`.
|
||||
|
||||
---
|
||||
|
||||
## 4. The QR / Bootstrap Token Flow
|
||||
|
||||
### 4.1 What `openclaw qr` Does
|
||||
|
||||
1. Calls `issueDeviceBootstrapToken()` on the gateway
|
||||
2. Generates a **short-lived, single-use** `bootstrapToken`
|
||||
3. Encodes `{ url, bootstrapToken, expiresAtMs }` as base64url
|
||||
4. Renders as QR code or pasteable setup code
|
||||
|
||||
### 4.2 bootstrapToken vs gateway.auth.token
|
||||
|
||||
| | `bootstrapToken` | `gateway.auth.token` |
|
||||
|---|---|---|
|
||||
| **Purpose** | Initial device pairing | Shared-secret auth for operators |
|
||||
| **Lifetime** | Short-lived, single-use | Permanent until changed |
|
||||
| **Scope** | Node pairing + bounded operator bootstrap | Full operator access |
|
||||
| **Generated by** | `openclaw qr` / `/pair` | User config in `openclaw.json` |
|
||||
| **Auto-approval** | Yes — gateway auto-approves bootstrap-token handshakes | No — manual `devices approve` needed |
|
||||
|
||||
### 4.3 The Auth Cascade (How the Gateway Resolves Auth)
|
||||
|
||||
When a node connects with `auth: { token: "...", bootstrapToken: "..." }`, the gateway tries (from `auth-context.ts`):
|
||||
|
||||
1. **Shared-secret auth** — `auth.token` vs `gateway.auth.token/password`
|
||||
2. **Bootstrap token** — `auth.bootstrapToken` vs issued bootstrap tokens
|
||||
- If valid: `authMethod = "bootstrap-token"`, auto-approved!
|
||||
- Preferred over shared-secret even if both succeed (QR flow relies on this)
|
||||
3. **Device token** — `auth.token` as device-token fallback (for already-paired devices)
|
||||
|
||||
### 4.4 Setup Wizard Entry Points
|
||||
|
||||
The setup code and QR code are the same bootstrap concept in different packaging:
|
||||
|
||||
```text
|
||||
QR image
|
||||
-> decodes to setup code text
|
||||
-> decodes to JSON payload
|
||||
-> contains gateway URL + bootstrapToken + expiry
|
||||
```
|
||||
|
||||
Advanced users can drop into setup at any level:
|
||||
|
||||
| Entry point | User has | Wizard behavior |
|
||||
|---|---|---|
|
||||
| QR image | A saved/screenshot/email attachment containing the QR | Import or paste the image, decode QR text, then decode the setup payload |
|
||||
| Setup code | The pasteable text from `openclaw qr` | Paste the text directly, then decode the setup payload |
|
||||
| Manual URL + token | Gateway URL/IP and a long-lived gateway token | Skip bootstrap; connect with `auth.token` and use manual approval if required |
|
||||
|
||||
The QR/setup-code path is preferred for first-time node onboarding because it avoids telling users to copy permanent gateway secrets and enables auto-approval.
|
||||
|
||||
### 4.5 What Our Setup Wizard Does
|
||||
|
||||
The Windows Setup Wizard:
|
||||
1. Accepts a QR image, clipboard QR image, pasteable setup code, or manual gateway URL/token.
|
||||
2. For QR/setup-code input, decodes `{ url, bootstrapToken, expiresAtMs }`.
|
||||
3. Stores `bootstrapToken` separately from the normal gateway `Token` setting.
|
||||
4. Sends it as `auth.bootstrapToken` in the node connect handshake.
|
||||
|
||||
This lets the gateway correctly classify QR setup as a bootstrap-token handshake, which enables:
|
||||
- Silent auto-approval (no manual `devices approve` needed)
|
||||
- Bootstrap token revocation after pairing
|
||||
- Bounded operator token handoff (if configured)
|
||||
|
||||
### 4.6 Post-Pairing: Device Tokens
|
||||
|
||||
After a successful bootstrap-token pairing:
|
||||
1. Gateway issues a `deviceToken` in `hello-ok.auth.deviceToken`
|
||||
2. Node should **save** this device token
|
||||
3. Future connections use `auth.token = <deviceToken>` (device-token auth path)
|
||||
4. The bootstrap token is revoked and no longer valid
|
||||
|
||||
Windows stores `hello-ok.auth.deviceToken` in its device identity file and prefers that saved device token on future node connections. The bootstrap token is only used when there is no saved device token yet.
|
||||
|
||||
### 4.7 Bootstrap Flow
|
||||
|
||||
```
|
||||
1. User runs `openclaw qr` on gateway host
|
||||
2. User imports/scans QR image or pastes setup code into Windows Setup Wizard
|
||||
3. Wizard decodes → { url, bootstrapToken, expiresAtMs }
|
||||
4. Node connects with: auth: { bootstrapToken: "<token>" }
|
||||
5. Gateway auto-approves pairing (bootstrap-token auth method)
|
||||
6. Gateway returns hello-ok with: auth: { deviceToken: "<token>" }
|
||||
7. Node saves deviceToken to identity store
|
||||
8. Future connections use: auth: { token: "<deviceToken>" }
|
||||
9. No manual `devices approve` needed!
|
||||
```
|
||||
|
||||
Manual URL/token setup remains useful for advanced troubleshooting and environments where QR/bootstrap is unavailable. In that path, the tray may show a pairing notification with an `openclaw devices approve <device-id>` command that must be run on the gateway host.
|
||||
|
||||
---
|
||||
|
||||
## 5. Recommendations
|
||||
|
||||
### 5.0 Design Conclusion: Safe Windows/macOS Parity
|
||||
|
||||
The root issue is not that the gateway fails to recognize Windows. It recognizes Windows correctly. The problem is that `platform: "windows"` currently gets only the headless exec-host defaults, while the Windows tray app is now a full node that can declare canvas, camera, location, and screen capabilities.
|
||||
|
||||
The simplest upstream fix is to make Windows match macOS for **safe declared commands**, while keeping dangerous commands explicit opt-in.
|
||||
|
||||
This does **not** make every Windows node capable of camera/canvas/location/screen. A command still has to pass both gates:
|
||||
|
||||
1. The node must declare the command.
|
||||
2. The gateway policy must allow the command.
|
||||
|
||||
So a headless Windows node host that only declares `system.run` / `system.which` remains exec-only. Expanding the Windows default allowlist just stops the gateway from filtering safe commands that a Windows node explicitly advertises.
|
||||
|
||||
Recommended gateway defaults:
|
||||
|
||||
| Command bucket | Windows default? | Reason |
|
||||
|----------------|------------------|--------|
|
||||
| Safe declared companion commands: `canvas.*`, `camera.list`, `location.get`, `screen.snapshot`, `device.info`, `device.status` | Yes | Matches macOS parity and only applies when declared by the node |
|
||||
| Dangerous/privacy-heavy commands: `camera.snap`, `camera.clip`, `screen.record`, `stt.transcribe`, write commands like `contacts.add` | No | Existing gateway model already requires explicit `gateway.nodes.allowCommands` |
|
||||
| Exec commands: `system.run`, `system.run.prepare`, `system.which`, `system.notify`, `browser.proxy` | Yes | Existing Windows headless-host behavior |
|
||||
|
||||
Until the gateway expands Windows safe defaults, the practical local solution is:
|
||||
|
||||
1. Keep declaring the correct command names from the Windows node.
|
||||
2. Configure `gateway.nodes.allowCommands` for the Windows companion features.
|
||||
3. Re-pair after command-list changes because the gateway snapshots commands at approval time.
|
||||
|
||||
### 5.1 Gateway Node Allowlist Configuration
|
||||
|
||||
`gateway.nodes.allowCommands` is the explicit opt-in list the gateway uses after platform defaults. It should contain exact command names, not broad wildcard grants, for commands that are safe but not yet in the Windows default policy.
|
||||
|
||||
Recommended safe Windows companion allowlist:
|
||||
|
||||
```bash
|
||||
openclaw config set gateway.nodes.allowCommands '["canvas.present","canvas.hide","canvas.navigate","canvas.eval","canvas.snapshot","canvas.a2ui.push","canvas.a2ui.pushJSONL","canvas.a2ui.reset","camera.list","location.get","screen.snapshot","device.info","device.status","system.execApprovals.get","system.execApprovals.set"]'
|
||||
openclaw gateway restart
|
||||
```
|
||||
|
||||
`gateway.nodes.denyCommands` can be used as a final explicit blocklist when you want to suppress a command even if a platform default or allowlist entry would otherwise allow it.
|
||||
|
||||
Privacy-sensitive commands should stay out of the default safe list and should only be added deliberately:
|
||||
|
||||
```text
|
||||
camera.snap
|
||||
camera.clip
|
||||
screen.record
|
||||
stt.transcribe
|
||||
```
|
||||
|
||||
After changing either `gateway.nodes.allowCommands` or `gateway.nodes.denyCommands`, re-approve or re-pair the Windows node. Approved device records may keep a snapshot of the commands that were visible at approval time, so a gateway restart alone may not refresh existing approvals.
|
||||
|
||||
### 5.2 Immediate Code Fixes (This Branch)
|
||||
|
||||
- [x] Rename `screen.capture` → `screen.snapshot` in `ScreenCapability.cs`
|
||||
- [x] Remove `screen.list` from declared commands
|
||||
- [x] Remove debug logging from `WindowsNodeClient.cs`
|
||||
- [x] Add Mac-compatible fixed-duration `screen.record`; do not add `screen.list` or record start/stop commands
|
||||
|
||||
### 5.3 Setup Wizard Improvements
|
||||
|
||||
- [x] Send `bootstrapToken` in correct field: `auth.bootstrapToken` not `auth.token`
|
||||
- [x] Handle `hello-ok.auth.deviceToken` — save it for future connections
|
||||
- [x] Accept QR images and clipboard setup content as alternate ways to enter the same bootstrap payload
|
||||
- [x] Show "auto-paired!" vs "waiting for approval" based on auth method
|
||||
- [x] Handle bootstrap token expiry gracefully when setup code payloads include expiry metadata (`expiresAt`, `expires_at`, `expires`, `expiry`, or `exp`)
|
||||
- [x] Add Settings toggles for optional Windows node capability groups (`canvas`, `screen`, `camera`, `location`, `browser.proxy`)
|
||||
|
||||
### 5.4 Upstream Contributions / Issues to File
|
||||
|
||||
- [x] **Request Windows/macOS parity for safe declared commands** — Windows should allow the same safe companion commands macOS does, while dangerous commands stay explicit opt-in. Draft included below.
|
||||
- [x] **Document `gateway.nodes.allowCommands`** — local Windows integration docs now describe allowCommands, denyCommands, safe parity commands, privacy-sensitive opt-ins, and re-pair requirements.
|
||||
- [x] **Add `canvas.a2ui.pushJSONL`** — current Mac supports it as a legacy JSONL alias; Windows routes it through the same A2UI push handler
|
||||
|
||||
#### Upstream issue draft
|
||||
|
||||
**Title:** Expand Windows node default allowlist for safe declared companion commands
|
||||
|
||||
**Body:**
|
||||
|
||||
Windows nodes are currently treated like Linux/headless exec hosts in `src/gateway/node-command-policy.ts`:
|
||||
|
||||
```ts
|
||||
windows: [...SYSTEM_COMMANDS]
|
||||
```
|
||||
|
||||
That means the gateway filters out safe companion-app commands that a Windows node explicitly declares, including `canvas.*`, `camera.list`, `location.get`, and `screen.snapshot`. The Windows tray app is now a full companion node, not just an exec host, so this causes confusing behavior: the node can implement and advertise a command, but the gateway drops/rejects it unless users manually configure `gateway.nodes.allowCommands`.
|
||||
|
||||
Proposal:
|
||||
|
||||
- Add safe declared companion commands to Windows defaults, similar to macOS:
|
||||
- `canvas.present`
|
||||
- `canvas.hide`
|
||||
- `canvas.navigate`
|
||||
- `canvas.eval`
|
||||
- `canvas.snapshot`
|
||||
- `canvas.a2ui.push`
|
||||
- `canvas.a2ui.pushJSONL`
|
||||
- `canvas.a2ui.reset`
|
||||
- `camera.list`
|
||||
- `location.get`
|
||||
- `screen.snapshot`
|
||||
- `device.info`
|
||||
- `device.status`
|
||||
- Keep dangerous/privacy-heavy commands explicit opt-in via `gateway.nodes.allowCommands`:
|
||||
- `camera.snap`
|
||||
- `camera.clip`
|
||||
- `screen.record`
|
||||
- `stt.transcribe`
|
||||
- write commands such as `contacts.add`, `calendar.add`, etc.
|
||||
|
||||
This does not grant capabilities to headless Windows hosts by itself. A command still has to pass both gates: the node must declare it in `commands`, and the gateway policy must allow it. Headless Windows node hosts that only declare `system.run` / `system.which` remain exec-only.
|
||||
|
||||
Related documentation gap: `gateway.nodes.allowCommands` and `gateway.nodes.denyCommands` should be documented in the gateway configuration reference, including the requirement to re-pair after command-list changes because approved pairing records snapshot declared commands.
|
||||
|
||||
### 5.5 User-Facing Documentation
|
||||
|
||||
When shipping the Windows node, README/wiki should tell users:
|
||||
|
||||
> **First-time setup**: After pairing your Windows node, add these commands to your gateway config:
|
||||
> ```bash
|
||||
> openclaw config set gateway.nodes.allowCommands '["canvas.present", "canvas.hide", "canvas.navigate", "canvas.eval", "canvas.snapshot", "canvas.a2ui.push", "canvas.a2ui.pushJSONL", "canvas.a2ui.reset", "camera.list", "screen.snapshot", "location.get", "device.info", "device.status", "system.execApprovals.get", "system.execApprovals.set"]'
|
||||
> openclaw gateway restart
|
||||
> ```
|
||||
> Then re-pair the node (`openclaw devices reject <old-id>` + re-approve).
|
||||
>
|
||||
> Add `camera.snap`, `camera.clip`, `screen.record`, and `stt.transcribe` only when you explicitly want to allow privacy-sensitive camera, screen, or microphone capture.
|
||||
>
|
||||
> The Windows tray Command Center (`openclaw://commandcenter`) surfaces these policy problems directly: it separates safe companion allowlist fixes from privacy-sensitive opt-ins and provides copyable repair text for safe fixes or pending pairing approval.
|
||||
|
||||
---
|
||||
|
||||
## 6. Reference: Gateway Source Files
|
||||
|
||||
| File | What It Does |
|
||||
|------|-------------|
|
||||
| `src/gateway/node-command-policy.ts` | Platform allowlists, dangerous commands, command filtering |
|
||||
| `src/gateway/device-metadata-normalization.ts` | Platform string normalization |
|
||||
| `src/infra/node-commands.ts` | Constants: `system.run/which/notify`, `browser.proxy`, `execApprovals.*` |
|
||||
| `src/gateway/server/ws-connection/auth-context.ts` | Auth cascade: shared-secret → bootstrap-token → device-token |
|
||||
| `extensions/device-pair/index.ts` | QR generation, bootstrap token issuance, pairing flow |
|
||||
| `src/cli/nodes-screen.ts` | CLI screen record helpers (confirms `screen.record` naming) |
|
||||
369
docs/wsl-owner-open-issues.md
Normal file
369
docs/wsl-owner-open-issues.md
Normal file
@ -0,0 +1,369 @@
|
||||
# OpenClaw Windows local gateway: WSL-owner Q&A
|
||||
|
||||
This document is the structured record of the questions we asked Craig Loewen
|
||||
(WSL) about the Windows OpenClaw local-gateway design, and Craig's answers.
|
||||
It is the canonical "why does the architecture look like this?" reference
|
||||
for the Windows local-gateway PR.
|
||||
|
||||
Companion: [`docs/wsl-owner-validation.md`](wsl-owner-validation.md)
|
||||
describes the resulting design as it ships.
|
||||
|
||||
**Status legend:** ✅ Answered (verbatim or paraphrased Craig answer
|
||||
recorded). 🟡 Open.
|
||||
|
||||
**Source:** Craig Loewen's review of the prototype `wsl-owner-open-issues.md`
|
||||
(2026-05-04). His answers are summarized authoritatively in
|
||||
`.squad/decisions.md` under "Decision: Craig Loewen's WSL Answers
|
||||
(Authoritative)" and underpinned the Phase 3 plan revision in
|
||||
`.squad/decisions-archive.md`. The architecture statements below are
|
||||
paraphrased; Mike's relayed verbatim Q&A lives in the squad decisions thread,
|
||||
not in the public PR.
|
||||
|
||||
The design is built on three coupled choices:
|
||||
|
||||
1. **Distribution model:** create a dedicated `OpenClawGateway` instance from
|
||||
the Store Ubuntu-24.04 package and configure it post-install — no custom
|
||||
OpenClaw rootfs.
|
||||
2. **Networking model:** loopback only between the Windows tray and the
|
||||
gateway in WSL — no WSL-IP fallback, no `lan`/`auto` bind.
|
||||
3. **Lifecycle model:** instance-scoped `wsl --terminate OpenClawGateway` for
|
||||
repair; user-systemd plus a tray-owned keepalive for liveness; no global
|
||||
`wsl --shutdown` and no global `.wslconfig` mutation.
|
||||
|
||||
The goal remains a low-maintenance implementation that uses the public
|
||||
OpenClaw Linux installer unchanged and does not maintain a custom OpenClaw
|
||||
Linux distribution.
|
||||
|
||||
## Final shape
|
||||
|
||||
1. The Windows tray verifies WSL/WSL2 availability.
|
||||
2. The tray creates a dedicated WSL2 instance named `OpenClawGateway` from
|
||||
the Store Ubuntu-24.04 package:
|
||||
```powershell
|
||||
wsl.exe --install Ubuntu-24.04 `
|
||||
--name OpenClawGateway `
|
||||
--location "$env:LOCALAPPDATA\OpenClawTray\wsl" `
|
||||
--no-launch `
|
||||
--version 2
|
||||
```
|
||||
3. The tray launches the instance as root and applies OpenClaw-owned
|
||||
configuration:
|
||||
- create the `openclaw` user;
|
||||
- create `/home/openclaw/.openclaw`, `/opt/openclaw`,
|
||||
`/var/lib/openclaw`, and `/var/log/openclaw`;
|
||||
- write `/etc/wsl.conf` and `/etc/wsl-distribution.conf`;
|
||||
- set the default user to `openclaw` via
|
||||
`wsl --manage OpenClawGateway --set-default-user openclaw`;
|
||||
- terminate only `OpenClawGateway` so WSL config takes effect.
|
||||
4. The tray runs the public OpenClaw Linux installer inside the instance:
|
||||
`https://openclaw.ai/install-cli.sh` with prefix `/opt/openclaw`. No
|
||||
forked or patched gateway installer.
|
||||
5. The tray uses upstream OpenClaw CLI/service commands to configure and
|
||||
start the gateway.
|
||||
6. The tray calls upstream `openclaw qr --json`, consumes the upstream
|
||||
setup-code/bootstrap-token handoff, and pairs Windows tray operator and
|
||||
Windows tray node sessions; both device tokens land in
|
||||
`%APPDATA%\OpenClawTray\device-key-ed25519.json`.
|
||||
|
||||
## Issue 1: Ubuntu Store package + post-install configuration
|
||||
|
||||
### Q1.1 — Is `wsl --install Ubuntu-24.04 --name OpenClawGateway --location ... --no-launch --version 2` a supported primitive for a Windows app creating a dedicated app-owned WSL instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Yes — supportable. This is the canonical primitive for an
|
||||
app-owned WSL instance.
|
||||
|
||||
**Implication:** `LocalGatewaySetup.cs` issues exactly this command. The
|
||||
clean port removed `--web-download`, `--from-file`, and any rootfs-import
|
||||
fallback.
|
||||
|
||||
### Q1.2 — Is it acceptable to treat the install as successful when post-conditions pass, even if the `wsl --install` process itself hangs or exits unclearly?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **Trust the exit code.** The hang-fallback pattern from the
|
||||
prototype is not needed.
|
||||
|
||||
**Implication:** The clean engine treats `wsl --install` exit 0 as the
|
||||
success signal, and additionally confirms `OpenClawGateway` appears in
|
||||
`wsl --list --quiet` to defend against the "winget-style" failure mode where
|
||||
exit 0 reports success without registering a distro (see Q1.3). Non-zero
|
||||
exit ⇒ install failure; no postcondition-on-hang path.
|
||||
|
||||
### Q1.3 — Should we prefer generic `Ubuntu`, explicit `Ubuntu-24.04`, `--web-download`, `--from-file`, or another source for the default path?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Use **explicit `Ubuntu-24.04`**, not generic `Ubuntu`. No
|
||||
`--web-download` and no `--from-file` are needed.
|
||||
|
||||
**Implication:** The clean install command is pinned to `Ubuntu-24.04`. The
|
||||
prototype's "generic `Ubuntu` channel was more reliable on this dev machine"
|
||||
observation is not a basis for a final product default.
|
||||
|
||||
Empirical confirmation (2026-05-04, 20-iter harness on Windows 10.0.26200,
|
||||
WSL 2.6.3.0): `wsl --install Ubuntu-24.04 --name <gen> --location <path>
|
||||
--no-launch --version 2` succeeded **10/10**; `winget install --id
|
||||
Canonical.Ubuntu.2404 -e --silent --accept-source-agreements
|
||||
--accept-package-agreements --disable-interactivity` succeeded **0/10**
|
||||
(stages the launcher APPX but never registers a WSL distro under
|
||||
`--silent --disable-interactivity`). Raw artifacts:
|
||||
`artifacts/wsl-install-vs-winget/run-20260504-131837/summary.json`.
|
||||
|
||||
### Q1.4 — What is the recommended enterprise/offline fallback when Store access is blocked?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Modern WSL distributions are no longer Store-gated; an offline
|
||||
fallback is **not needed** for this PR.
|
||||
|
||||
**Implication:** No offline fallback path ships in this PR. If a future
|
||||
enterprise scenario surfaces a real blocker, that decision can be revisited
|
||||
separately.
|
||||
|
||||
### Q1.5 — Are `automount=false`, `interop=false`, and `appendWindowsPath=false` appropriate for this managed instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Yes — all three settings are appropriate for an app-owned
|
||||
appliance.
|
||||
|
||||
**Implication:** `/etc/wsl.conf` ships with all three disabled (see
|
||||
`docs/wsl-owner-validation.md`).
|
||||
|
||||
### Q1.6 — Are there WSL/systemd/machine-id/DNS/timezone details we should explicitly repair or validate after cloning/configuring an Ubuntu instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No post-clone repairs needed** — machine-id / DNS / timezone
|
||||
work as delivered.
|
||||
|
||||
**Implication:** The setup engine does not regenerate `/etc/machine-id`,
|
||||
does not rewrite `/etc/resolv.conf`, and does not touch timezone state. It
|
||||
relies on `useWindowsTimezone=true` in `/etc/wsl.conf` for clock alignment.
|
||||
|
||||
### Q1.7 — Should OpenClaw avoid writing `/etc/wsl-distribution.conf`, or is it appropriate to suppress shortcuts/terminal profile for the dedicated instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Use both `wsl.conf` and `wsl-distribution.conf`. Suppressing
|
||||
shortcut/terminal entries is the correct application of
|
||||
`wsl-distribution.conf` for a privately managed instance.
|
||||
|
||||
**Implication:** The setup engine writes `/etc/wsl-distribution.conf` with
|
||||
`shortcut.enabled=false` and `terminal.enabled=false`.
|
||||
|
||||
## Issue 2: Local networking between Windows and the WSL gateway
|
||||
|
||||
### Q2.1 — Is Windows localhost forwarding to a WSL2 service reliable enough to make `loopback` the final default?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **Yes — loopback only.** Windows localhost forwarding to a WSL2
|
||||
service is a reliable core WSL promise.
|
||||
|
||||
**Implication:** Gateway binds to loopback inside WSL on `:18789`. Windows
|
||||
tray connects via `http://localhost:18789` / `ws://localhost:18789`. The
|
||||
prototype's earlier observations of localhost-forwarding flakiness were
|
||||
attributed to other lifecycle issues (see Issue 3) and not to the forwarding
|
||||
contract itself.
|
||||
|
||||
### Q2.2 — If localhost forwarding fails, is WSL-IP fallback a supported/recommended pattern for a Windows app-owned WSL instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No.** WSL-IP fallback is not the recommended pattern.
|
||||
|
||||
**Implication:** The clean port has **no** WSL-IP fallback. The endpoint
|
||||
resolver does not enumerate WSL interface addresses, does not run
|
||||
`hostname -I` / `ip -4 addr` / `ip route` / `ss -ltnp` inside WSL, and
|
||||
returns exactly one candidate: `http://localhost:18789`.
|
||||
|
||||
### Q2.3 — Is `gateway.bind=lan` inside the WSL instance acceptable for the fallback path, assuming the Windows tray still only advertises/selects local endpoints by default?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No** — loopback only.
|
||||
|
||||
**Implication:** The setup engine never writes `gateway.bind=lan`. The
|
||||
runtime configuration surface for `gateway.bind` was removed.
|
||||
|
||||
### Q2.4 — Should we implement `auto` bind promotion instead of defaulting to `lan`?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No.** Loopback only; no `auto` promotion.
|
||||
|
||||
**Implication:** No promotion logic exists in the clean port. There is one
|
||||
bind mode, and it is loopback.
|
||||
|
||||
### Q2.5 — Are there WSL NAT, mirrored networking, firewall, or portproxy recommendations we should follow while still avoiding global `.wslconfig` changes?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** No — loopback forwarding works without any of those
|
||||
modifications.
|
||||
|
||||
**Implication:** The tray does not write to `.wslconfig`, does not configure
|
||||
mirrored networking, does not add Windows firewall rules, and does not run
|
||||
`netsh interface portproxy` for normal local-gateway operation.
|
||||
|
||||
### Q2.6 — What diagnostics should we capture before asking users/maintainers to file WSL networking bugs?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Point at **<https://aka.ms/wsllogs>**. Do not scrape WSL internal
|
||||
log files from the product.
|
||||
|
||||
**Implication:** On any setup or networking failure, the
|
||||
`LocalSetupProgressPage` shows an aka.ms/wsllogs hint, the validation
|
||||
script's `Save-DiagnosticsSnapshot` records `wslLogsHelp =
|
||||
https://aka.ms/wsllogs`, and the run summary appends a "Diagnostics: see
|
||||
https://aka.ms/wsllogs..." note. The product captures only its own state
|
||||
(Windows-side `:18789` listener snapshot, loopback `/health` probe,
|
||||
redacted setup-state.json) and a generated repro guide.
|
||||
|
||||
## Issue 3: WSL gateway lifecycle and service ownership
|
||||
|
||||
### Q3.1 — For an app-owned WSL appliance, should the gateway be a user-systemd service, a root/system service wrapper, or something else?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Both **user-systemd** and a **tray-owned keepalive** are
|
||||
acceptable for this shape.
|
||||
|
||||
**Implication:** The clean port uses upstream OpenClaw service primitives
|
||||
under the `openclaw` user, plus a tray-owned WSL keepalive
|
||||
(`wsl.exe -d OpenClawGateway -u openclaw -- sleep 2147483647`) while
|
||||
local-gateway mode is active. Readiness still requires Windows-side
|
||||
`/health` to succeed — `systemctl active` alone does not imply Windows
|
||||
reachability.
|
||||
|
||||
### Q3.2 — Is `loginctl enable-linger openclaw` expected to be reliable in this WSL shape, or should we avoid depending on it?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Linger is acceptable for this shape (alongside the tray
|
||||
keepalive).
|
||||
|
||||
**Implication:** Setup runs `loginctl enable-linger openclaw`. The tray
|
||||
keepalive remains as belt-and-suspenders for the active local-gateway
|
||||
window.
|
||||
|
||||
### Q3.3 — Is a tray-owned keepalive process acceptable, or should it be treated as validation-only?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Acceptable as a product primitive (see Q3.1). It is not
|
||||
validation-only.
|
||||
|
||||
**Implication:** The keepalive ships as part of the runtime, not just as a
|
||||
test scaffold.
|
||||
|
||||
### Q3.4 — Is instance-scoped `wsl --terminate OpenClawGateway` the right repair/restart primitive?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **Yes.** Use `wsl --terminate OpenClawGateway` only. **Never**
|
||||
global `wsl --shutdown`.
|
||||
|
||||
**Implication:** Setup, repair, validation, and removal paths all use
|
||||
`wsl --terminate OpenClawGateway`. `git grep 'wsl --shutdown'` over the
|
||||
clean worktree returns no product or validation hits.
|
||||
|
||||
### Q3.5 — Are there cases where global `wsl --shutdown` is recommended or unavoidable, despite our desire to avoid it?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No.** Do not issue `wsl --shutdown` from this product.
|
||||
|
||||
**Implication:** Recreate / FreshMachine validation scenarios use
|
||||
`wsl --unregister OpenClawGateway` for destructive cleanup. They never
|
||||
issue a global shutdown.
|
||||
|
||||
### Q3.6 — What lifecycle diagnostics should the tray collect when WSL reports the service active but Windows cannot connect?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Same answer as Q2.6 — point at <https://aka.ms/wsllogs>; the
|
||||
product should not scrape WSL logs.
|
||||
|
||||
**Implication:** The product collects only its own state and points at the
|
||||
WSL-team-owned diagnostics page. See Q2.6.
|
||||
|
||||
## Mac app comparison: operator vs node
|
||||
|
||||
The macOS app runs operator/UI and a local Mac node from the same app
|
||||
binary/process via separate gateway sessions:
|
||||
|
||||
- `GatewayConnection.shared` owns one `GatewayChannelActor` for
|
||||
operator/UI scopes (`role: "operator"`, `clientMode: "ui"`).
|
||||
- `MacNodeModeCoordinator.shared.start()` owns a separate
|
||||
`GatewayNodeSession` and `MacNodeRuntime` (`role: "node"`,
|
||||
`clientId: "openclaw-macos"`, capabilities for canvas / screen / browser
|
||||
/ etc.), connecting to the same gateway URL over a distinct WebSocket.
|
||||
- In local mode, `GatewayProcessManager` manages the local gateway via
|
||||
launchd / OpenClaw CLI behavior; in remote mode,
|
||||
`ConnectionModeCoordinator` stops the local gateway and uses
|
||||
`NodeServiceManager.start()` against the remote gateway.
|
||||
|
||||
**Implication for Windows (decided by Mike):** The Windows tray pairs as
|
||||
**both operator and node** against the local gateway, mirroring the macOS
|
||||
in-app node model. There is **no separate WSL-internal worker** in this
|
||||
PR. `StartWorker` / `PairWorker` phases were dropped; the
|
||||
`PreserveWorkerData` parameter and `worker_data_preserved` lifecycle step
|
||||
were removed in Phase 3 cleanup.
|
||||
|
||||
If a future scope adds a Linux worker inside the WSL gateway instance, it
|
||||
will require a separate upstream-supported install/start/list proof and a
|
||||
new owner decision — not a re-litigation of the current PR.
|
||||
|
||||
## Architectural decisions captured
|
||||
|
||||
For traceability, the high-order decisions implied by Craig's answers are:
|
||||
|
||||
1. **Distribution model** — Store Ubuntu-24.04 + post-install configuration;
|
||||
no custom rootfs; no offline fallback. (Q1.1, Q1.3, Q1.4)
|
||||
2. **Configuration** — `wsl.conf` (systemd, automount/interop/appendPath
|
||||
off, default user `openclaw`, `useWindowsTimezone=true`) +
|
||||
`wsl-distribution.conf` (no shortcut, no terminal). No post-clone
|
||||
repairs. (Q1.5, Q1.6, Q1.7)
|
||||
3. **Networking** — Loopback only, port 18789. No WSL-IP fallback. No
|
||||
`lan`/`auto` bind. No `.wslconfig` / portproxy / firewall mutation.
|
||||
(Q2.1–Q2.5)
|
||||
4. **Lifecycle** — User-systemd + tray keepalive. Linger acceptable.
|
||||
`wsl --terminate OpenClawGateway` for repair. **Never** global
|
||||
`wsl --shutdown`. (Q3.1–Q3.5)
|
||||
5. **Diagnostics** — `https://aka.ms/wsllogs`. No internal log scraping.
|
||||
(Q2.6, Q3.6)
|
||||
6. **Roles in scope** — Windows tray operator + Windows tray node.
|
||||
Worker-in-WSL out of scope. (Mac app comparison + Mike's Phase-0
|
||||
decision.)
|
||||
|
||||
These decisions are reflected one-for-one in:
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/Services/LocalGatewaySetup/LocalGatewaySetup.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs` (factory + identity-path wiring)
|
||||
- `src/OpenClaw.Tray.WinUI/Services/NodeService.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/Pages/SetupWarningPage.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/Pages/LocalSetupProgressPage.cs`
|
||||
- `scripts/validate-wsl-gateway.ps1` (4 scenarios)
|
||||
- `scripts/reset-openclaw-wsl-validation-state.ps1` (exact-target gated
|
||||
cleanup)
|
||||
|
||||
## Open follow-ups
|
||||
|
||||
These are not open architecture questions for Craig — they are tracked
|
||||
work items that intentionally fall outside this PR:
|
||||
|
||||
- **Off-box / LAN / phone reachability via OpenClaw relay.** Blocked on
|
||||
relay ownership / protocol clarity. Not addressed in this PR.
|
||||
- **`winget install Microsoft.WSL` as a platform repair fallback.** Deeper
|
||||
research in flight; does not change the Phase 3 decision to use
|
||||
`wsl --install` for distro creation in this PR.
|
||||
- **Onboarding copy localization.** `Onboarding_SetupWarning_*` /
|
||||
`Onboarding_LocalSetupProgress_*` resw entries to be added across
|
||||
supported locales after Mike signs off final copy.
|
||||
|
||||
No open questions for Craig remain that block this PR.
|
||||
384
docs/wsl-owner-validation.md
Normal file
384
docs/wsl-owner-validation.md
Normal file
@ -0,0 +1,384 @@
|
||||
# OpenClaw Windows local gateway: WSL design validation
|
||||
|
||||
This document describes the WSL design that ships in this PR. It reflects Craig
|
||||
Loewen's authoritative review of `docs/wsl-owner-open-issues.md` (verbatim Q&A
|
||||
reproduced inline in that companion doc). Where the prototype enumerated
|
||||
options, this version states the chosen design.
|
||||
|
||||
The current scope is:
|
||||
|
||||
- A dedicated app-owned **Ubuntu-24.04** WSL2 instance named `OpenClawGateway`,
|
||||
created from the standard Ubuntu Store package and then configured by the
|
||||
Windows tray.
|
||||
- The public OpenClaw Linux installer (`https://openclaw.ai/install-cli.sh`)
|
||||
runs unchanged inside that instance with prefix `/opt/openclaw`.
|
||||
- **Loopback-only** local networking (`http://localhost:18789`) between the
|
||||
Windows tray and the gateway.
|
||||
- Repair / restart via instance-scoped `wsl --terminate OpenClawGateway`.
|
||||
- Diagnostics on failure pointed at <https://aka.ms/wsllogs>.
|
||||
- The Windows tray pairs as both **operator** and **node** against the local
|
||||
gateway (matching the macOS app's in-app node model). No worker-in-WSL is
|
||||
installed by the Windows tray in this PR.
|
||||
|
||||
Out of scope for this PR (explicitly):
|
||||
|
||||
- No custom OpenClaw rootfs / OpenClaw-distributed Linux image.
|
||||
- No `--web-download` / `--from-file` / signed offline-base-artifact fallback.
|
||||
- No WSL-IP / `lan` / `auto`-bind fallback. No `gateway.bind` overrides.
|
||||
- No global `.wslconfig` mutation. No global `wsl --shutdown` from any product
|
||||
or validation path.
|
||||
- No `\\wsl$` or `\\wsl.localhost` file I/O. All WSL file operations go through
|
||||
`wsl.exe -d OpenClawGateway -- ...`.
|
||||
|
||||
## High-level user experience
|
||||
|
||||
1. User installs or opens the Windows tray app.
|
||||
2. The first onboarding page (`SetupWarningPage`) offers **Set up locally**
|
||||
(default) or **Advanced setup**.
|
||||
3. **Set up locally** opens `LocalSetupProgressPage`, which drives
|
||||
`LocalGatewaySetupEngine` to:
|
||||
- preflight the WSL host;
|
||||
- create the `OpenClawGateway` instance from Ubuntu-24.04;
|
||||
- apply OpenClaw-owned WSL configuration (`/etc/wsl.conf`,
|
||||
`/etc/wsl-distribution.conf`, `openclaw` user, state directories);
|
||||
- install OpenClaw via the public installer;
|
||||
- prepare and start the gateway service;
|
||||
- mint a bootstrap setup-code via `openclaw qr --json`;
|
||||
- pair the Windows tray operator and Windows tray node;
|
||||
- verify end-to-end reachability over loopback.
|
||||
4. On terminal failure, the page surfaces a link to <https://aka.ms/wsllogs>;
|
||||
no internal log scraping is attempted.
|
||||
|
||||
## End-state architecture
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Windows["Windows user session"]
|
||||
Tray["OpenClaw Tray app"]
|
||||
Identity["%APPDATA%\OpenClawTray\<br/>device-key-ed25519.json (operator + node)"]
|
||||
Engine["LocalGatewaySetupEngine"]
|
||||
WslFeature["Windows WSL platform"]
|
||||
end
|
||||
|
||||
subgraph WSL["WSL2: OpenClawGateway"]
|
||||
Ubuntu["Ubuntu-24.04 (Store)"]
|
||||
WslConf["/etc/wsl.conf<br/>systemd=true<br/>automount=false<br/>interop=false<br/>appendWindowsPath=false<br/>default user=openclaw"]
|
||||
DistroConf["/etc/wsl-distribution.conf<br/>shortcut=false<br/>terminal=false"]
|
||||
Systemd["systemd"]
|
||||
Installer["public installer<br/>install-cli.sh<br/>--prefix /opt/openclaw"]
|
||||
GatewaySvc["openclaw gateway<br/>bind=loopback :18789"]
|
||||
State["/var/lib/openclaw"]
|
||||
end
|
||||
|
||||
Tray --> Engine
|
||||
Engine -->|"wsl --install Ubuntu-24.04 --name OpenClawGateway --location <appdata>\OpenClawTray\wsl --no-launch --version 2"| WslFeature
|
||||
WslFeature --> Ubuntu
|
||||
Ubuntu --> WslConf
|
||||
Ubuntu --> DistroConf
|
||||
WslConf --> Systemd
|
||||
Engine -->|"wsl -d OpenClawGateway -u root -- bash install-cli.sh"| Installer
|
||||
Installer --> GatewaySvc
|
||||
Systemd --> GatewaySvc
|
||||
GatewaySvc --> State
|
||||
Tray -->|"http://localhost:18789 (operator + node WebSocket sessions)"| GatewaySvc
|
||||
Tray --> Identity
|
||||
```
|
||||
|
||||
## WSL touch points
|
||||
|
||||
### Dedicated WSL instance lifecycle
|
||||
|
||||
The tray treats WSL as an application-owned runtime boundary and uses a single
|
||||
dedicated WSL2 instance named `OpenClawGateway`. The base is **Ubuntu-24.04**
|
||||
from the Store; the OpenClaw-owned configuration is applied after the instance
|
||||
is laid down.
|
||||
|
||||
| Operation | WSL command | Scope |
|
||||
| --- | --- | --- |
|
||||
| Preflight | `wsl.exe --status`, `wsl.exe --list --verbose` | Read-only WSL capability checks |
|
||||
| Instance creation | `wsl.exe --install Ubuntu-24.04 --name OpenClawGateway --location <%LOCALAPPDATA%>\OpenClawTray\wsl --no-launch --version 2` | Creates only the dedicated OpenClaw instance |
|
||||
| In-instance configuration | `wsl.exe -d OpenClawGateway -u root -- ...` | Writes `/etc/wsl.conf`, `/etc/wsl-distribution.conf`, creates `openclaw` user and state dirs |
|
||||
| Default user | `wsl.exe --manage OpenClawGateway --set-default-user openclaw` | Locks default user to `openclaw` |
|
||||
| Apply config | `wsl.exe --terminate OpenClawGateway` (then implicit restart on next command) | Picks up `wsl.conf` changes |
|
||||
| Public OpenClaw install | `wsl.exe -d OpenClawGateway -u root -- bash -c "curl -fsSL https://openclaw.ai/install-cli.sh \| bash -s -- --prefix /opt/openclaw"` | Runs the public installer unchanged |
|
||||
| Service start/check | `wsl.exe -d OpenClawGateway -u root -- systemctl ...` | Starts/checks OpenClaw gateway |
|
||||
| Repair | `wsl.exe --terminate OpenClawGateway` | Instance-scoped restart only |
|
||||
| Remove | `wsl.exe --terminate OpenClawGateway`, `wsl.exe --unregister OpenClawGateway` | Requires explicit user confirmation |
|
||||
|
||||
Guarantees:
|
||||
|
||||
- **WSL2 only** for the OpenClaw instance.
|
||||
- The tray never modifies the user's default WSL instance.
|
||||
- The tray never modifies global `.wslconfig`.
|
||||
- The tray never calls global `wsl.exe --shutdown` in any product, validation,
|
||||
repair, or removal path.
|
||||
- The tray never unregisters arbitrary WSL instances; only the exact
|
||||
`OpenClawGateway` name is eligible, and destructive cleanup requires explicit
|
||||
confirmation in scripts.
|
||||
|
||||
### Install command and success criterion
|
||||
|
||||
The single canonical install primitive is:
|
||||
|
||||
```powershell
|
||||
wsl.exe --install Ubuntu-24.04 `
|
||||
--name OpenClawGateway `
|
||||
--location "$env:LOCALAPPDATA\OpenClawTray\wsl" `
|
||||
--no-launch `
|
||||
--version 2
|
||||
```
|
||||
|
||||
Success criterion (per Craig): **trust the `wsl --install` exit code**.
|
||||
There is no postcondition-on-hang fallback. After exit, the engine confirms
|
||||
that `OpenClawGateway` appears in `wsl --list --quiet`; failure of that
|
||||
post-condition is treated as install failure regardless of stdout.
|
||||
|
||||
`Ubuntu-24.04` is used explicitly (not the generic `Ubuntu` channel). No
|
||||
`--web-download` and no `--from-file` are used; there is no offline base
|
||||
fallback in this PR.
|
||||
|
||||
#### Empirical evidence
|
||||
|
||||
The literature recommendation (`wsl --install` over `winget install
|
||||
Canonical.Ubuntu.2404`) was confirmed empirically on 2026-05-04 with a 20-iter
|
||||
harness:
|
||||
|
||||
| Path | success | failure | strict success rate |
|
||||
|---|---:|---:|---|
|
||||
| `wsl --install Ubuntu-24.04 --name <gen> --location <path> --no-launch --version 2` | 10 | 0 | **10/10** |
|
||||
| `winget install --id Canonical.Ubuntu.2404 -e --silent --accept-source-agreements --accept-package-agreements --disable-interactivity` | 0 | 10 | **0/10** |
|
||||
|
||||
Success ≡ exit 0 AND target distro registered in `wsl --list --quiet`.
|
||||
|
||||
Root cause for winget 0/10: `Canonical.Ubuntu.2404` is the launcher APPX, not
|
||||
a WSL distro creator; with `--silent --disable-interactivity` the launcher is
|
||||
never invoked, so the APPX stages but no distro registers. winget cannot pass
|
||||
`--name` or `--location` to the launcher.
|
||||
|
||||
Harness, raw timings, exit codes, and per-iteration `detail.json`:
|
||||
`artifacts/wsl-install-vs-winget/run-20260504-131837/summary.json`. (The
|
||||
`artifacts/` tree is gitignored; the summary will be present on any host that
|
||||
runs `scripts/experiments/wsl-install-vs-winget-empirical-2026-05-04.ps1`.)
|
||||
|
||||
A deeper winget research thread is in flight (Aaron-9, prototype worktree).
|
||||
That work may broaden the picture for `winget install Microsoft.WSL` as a
|
||||
**platform** repair fallback — it does not change the Phase 3 decision to use
|
||||
`wsl --install` for distro creation in this PR.
|
||||
|
||||
### `/etc/wsl.conf`
|
||||
|
||||
```ini
|
||||
[boot]
|
||||
systemd=true
|
||||
|
||||
[automount]
|
||||
enabled=false
|
||||
mountFsTab=false
|
||||
|
||||
[interop]
|
||||
enabled=false
|
||||
appendWindowsPath=false
|
||||
|
||||
[user]
|
||||
default=openclaw
|
||||
|
||||
[time]
|
||||
useWindowsTimezone=true
|
||||
```
|
||||
|
||||
Rationale (Craig confirmed all settings appropriate for an app-owned
|
||||
appliance):
|
||||
|
||||
- `systemd=true` — gateway is a systemd-managed service.
|
||||
- `automount.enabled=false` / `mountFsTab=false` — the gateway does not need
|
||||
Windows drive mounts.
|
||||
- `interop.enabled=false` / `appendWindowsPath=false` — the appliance does not
|
||||
shell out to Windows binaries.
|
||||
- `default=openclaw` — non-root default user; root only via explicit
|
||||
`wsl.exe -d OpenClawGateway -u root -- ...`.
|
||||
- `useWindowsTimezone=true` — gateway timestamps align with the user's
|
||||
Windows session.
|
||||
|
||||
Per Craig: no post-clone repairs needed (machine-id / DNS / timezone work as
|
||||
delivered by Ubuntu-24.04).
|
||||
|
||||
### `/etc/wsl-distribution.conf`
|
||||
|
||||
```ini
|
||||
[oobe]
|
||||
defaultName=OpenClawGateway
|
||||
|
||||
[shortcut]
|
||||
enabled=false
|
||||
|
||||
[terminal]
|
||||
enabled=false
|
||||
```
|
||||
|
||||
Rationale: the OpenClaw instance is an implementation detail; users should not
|
||||
see a Start menu shortcut or Windows Terminal profile for it. Craig confirmed
|
||||
this is the correct use of `wsl-distribution.conf` for a privately managed
|
||||
instance.
|
||||
|
||||
### Networking — loopback only
|
||||
|
||||
The gateway binds to **loopback inside WSL on port 18789**. The Windows tray
|
||||
connects via `http://localhost:18789` / `ws://localhost:18789`.
|
||||
|
||||
Per Craig: Windows localhost forwarding to a WSL2 service is a reliable core
|
||||
WSL promise. **No** WSL-IP fallback. **No** `lan` or `auto` bind. **No**
|
||||
`gateway.bind` overrides written by the tray. **No** Windows portproxy or
|
||||
firewall mutation.
|
||||
|
||||
The endpoint resolver and validation runner do not enumerate WSL interface
|
||||
addresses, do not run `hostname -I` / `ip -4 addr` / `ip route` / `ss -ltnp`
|
||||
inside WSL, and do not promote between bind modes. There is one Windows-side
|
||||
TCP listener snapshot of port 18789 plus a loopback `/health` probe.
|
||||
|
||||
Off-box / LAN / phone reachability is out of scope for this PR and will be
|
||||
handled separately when relay ownership and protocol are clear.
|
||||
|
||||
### Lifecycle and service ownership
|
||||
|
||||
- The gateway is started/managed via upstream OpenClaw CLI commands invoked
|
||||
through `wsl.exe -d OpenClawGateway -u root -- ...`.
|
||||
- `loginctl enable-linger openclaw` plus a tray-owned WSL keepalive
|
||||
(`wsl.exe -d OpenClawGateway -u openclaw -- sleep 2147483647`) keep the
|
||||
instance reachable while local-gateway mode is active. Both patterns are
|
||||
acceptable per Craig.
|
||||
- Repair primitive: `wsl.exe --terminate OpenClawGateway`. Global
|
||||
`wsl --shutdown` is **never** issued.
|
||||
- Removal: `wsl.exe --unregister OpenClawGateway` only (after explicit user
|
||||
confirmation), preceded by `wsl.exe --terminate OpenClawGateway`. Cleanup
|
||||
also removes the install-location directory.
|
||||
|
||||
Product readiness for the gateway requires all of:
|
||||
|
||||
1. service start/restart command returns;
|
||||
2. WSL listener exists on `:18789`;
|
||||
3. Windows-side `http://localhost:18789/health` probe succeeds;
|
||||
4. gateway status / RPC succeeds with the device token;
|
||||
5. setup-code mint succeeds.
|
||||
|
||||
`systemctl active` alone is not treated as readiness.
|
||||
|
||||
### Diagnostics
|
||||
|
||||
On any setup failure, the engine and validation script surface the link
|
||||
<https://aka.ms/wsllogs> for the user/maintainer to collect WSL logs. The
|
||||
product does **not** scrape WSL internal log files or invoke
|
||||
`wsl --shutdown` to collect them. The validation script's
|
||||
`Save-DiagnosticsSnapshot` records `wslLogsHelp = https://aka.ms/wsllogs` and
|
||||
`Write-Summary` appends a "Diagnostics: see https://aka.ms/wsllogs..." note
|
||||
to `summary.md` on failure.
|
||||
|
||||
### Host filesystem and file I/O
|
||||
|
||||
All WSL file operations from Windows go through `wsl.exe -d OpenClawGateway
|
||||
-- ...` subprocess calls. `\\wsl$` and `\\wsl.localhost` are forbidden in
|
||||
product code, validation scripts, tests, and ad-hoc PowerShell. The instance
|
||||
does not depend on any Windows drive mount after setup.
|
||||
|
||||
### Pairing and protocol boundary
|
||||
|
||||
OpenClaw pairing is implemented entirely through the upstream OpenClaw
|
||||
protocol. The tray never edits gateway pairing stores directly.
|
||||
|
||||
1. Gateway starts with local token auth from
|
||||
`/var/lib/openclaw/gateway.env`.
|
||||
2. Tray invokes `wsl.exe -d OpenClawGateway -- openclaw qr --json` and
|
||||
decodes the upstream setup-code payload (with short-lived bootstrap
|
||||
token).
|
||||
3. Tray (operator) connects over WebSocket using its Ed25519 device identity
|
||||
and `auth.bootstrapToken`; gateway returns `hello-ok.auth.deviceToken`,
|
||||
stored in `%APPDATA%\OpenClawTray\device-key-ed25519.json` (operator
|
||||
token field).
|
||||
4. Tray (node) opens a separate WebSocket session with role `node` and
|
||||
pairs through the same setup-code/bootstrap-token flow; the resulting
|
||||
device token is stored in the same identity file under the **node**
|
||||
field.
|
||||
5. Subsequent reconnects use `auth.deviceToken`. Node tokens are never
|
||||
reused as `auth.token` and vice versa.
|
||||
|
||||
Identity-path invariant: operator and node device tokens share
|
||||
`%APPDATA%\OpenClawTray\device-key-ed25519.json` (`OPENCLAW_TRAY_APPDATA_DIR`
|
||||
override honored), with role distinction inside the file. The
|
||||
prototype-era split between `%APPDATA%` (operator) and `%LOCALAPPDATA%`
|
||||
(node) was closed in Phase 4.
|
||||
|
||||
The Windows tray node parallels the macOS app's in-app node model
|
||||
(`MacNodeModeCoordinator` with role `node`, separate session, capabilities
|
||||
declared). No WSL-internal worker is paired by the Windows tray in this PR.
|
||||
|
||||
## Validation
|
||||
|
||||
`scripts/validate-wsl-gateway.ps1` provides four scenarios. Each writes a
|
||||
JSON+markdown summary under `artifacts/validate-wsl-gateway/<run-id>/`.
|
||||
|
||||
Validation AppData isolation uses this canonical contract:
|
||||
|
||||
- `OPENCLAW_TRAY_DATA_DIR` is the settings/logs/run-marker root consumed by
|
||||
`SettingsManager`, `App.DataPath`, `Logger`, and token path resolution.
|
||||
- `OPENCLAW_TRAY_APPDATA_DIR` is the roaming identity-store root consumed by
|
||||
`DeviceIdentity`/pairing paths. Validation sets it alongside
|
||||
`OPENCLAW_TRAY_DATA_DIR` for backward compatibility and identity isolation.
|
||||
- `OPENCLAW_TRAY_LOCALAPPDATA_DIR` is the local setup-state/WSL-install root.
|
||||
|
||||
| Scenario | What it does | When to use | Destructive |
|
||||
|---|---|---|---|
|
||||
| `PreflightOnly` | Repo-layout sanity, WSL host status (`wsl --status`, `wsl --list --verbose`), relay-prototype probe (NotAvailable when no probe URI). No build, no install, no WSL state mutation. | Cheap CI / local sanity check. Safe on dev box. | No |
|
||||
| `UpstreamInstall` | Build + tests, then drives the tray onboarding so the product itself runs the canonical `wsl --install Ubuntu-24.04 --name OpenClawGateway --location <path> --no-launch --version 2` path. Smoke + bootstrap-token + operator+node pairing proofs over loopback. Reuses an existing `OpenClawGateway` instance if present. | Lab / dedicated machine. End-to-end product path. | Reuses existing distro state |
|
||||
| `FreshMachine` | `UpstreamInstall` after a fresh-machine reset: `wsl --unregister OpenClawGateway` + AppData wipe (single shot). | Lab. Fresh install proof. | Yes, scoped to `OpenClawGateway` |
|
||||
| `Recreate` | Iterated `FreshMachine`. Supports `-Iterations`. Uses `wsl --unregister` only — **never** `wsl --shutdown`. | Lab / repeatability harness. | Yes, scoped to `OpenClawGateway` |
|
||||
|
||||
Scenarios deliberately removed from the prototype: `BuildRootfs`,
|
||||
`InstallOnly`, `Smoke`, `Full`, `Loop`. Parameters deliberately removed:
|
||||
`-BuildDevRootfs`, `-BaseRootfsPath`, `-GatewayPackagePath`,
|
||||
`-UseExistingManifest`, `-RootfsPath`, `-AllowUnsignedDevArtifact`,
|
||||
`-SigningKeyId`, `-PublicKeyPath`,
|
||||
`-AllowNonStandardDistroNameForDestructiveClean`, `-NetworkingMode`,
|
||||
`-LoopMode`, `-RequireWorkerPairing`, `-CleanOpenClawState`,
|
||||
`-GoSkillProofCommand`, `-RequireGoSkillProof`.
|
||||
|
||||
The validation script:
|
||||
|
||||
- Drives onboarding via the `SetupWarningPage` "Set up locally" button
|
||||
(`OnboardingSetupLocal` automation ID); `LocalSetupProgressPage` autostarts
|
||||
the engine on appearance.
|
||||
- Polls `setup-state.json` for `Complete` (terminal status). Worker / rootfs
|
||||
phases are gone; terminal status is `Complete` only.
|
||||
- Snapshots loopback diagnostics on failure (Windows-side `:18789` listener
|
||||
state; loopback `/health` probe). Does **not** run any networking probes
|
||||
inside WSL.
|
||||
- Redacts sensitive output: `Redact-SensitiveGatewayOutput` over
|
||||
`openclaw qr --json` stdout, `Save-RedactedSettings` strips `Token`,
|
||||
`GatewayToken`, `BootstrapToken`, `bootstrap_token`, `NodeToken`,
|
||||
`nodeToken`; relay probe body strips `token=...`.
|
||||
|
||||
Scope guarantees from the validation script:
|
||||
|
||||
- Only `OpenClawGateway` is ever the target of `wsl --unregister`.
|
||||
- Global `wsl --shutdown` is never issued.
|
||||
- No `\\wsl$` or `\\wsl.localhost` paths are read or written.
|
||||
|
||||
Companion script:
|
||||
`scripts/reset-openclaw-wsl-validation-state.ps1` — exact-target gated
|
||||
cleanup for `OpenClawGateway` plus the `%APPDATA%\OpenClawTray` and
|
||||
`%LOCALAPPDATA%\OpenClawTray` directories. Refuses to act on any other distro
|
||||
name.
|
||||
|
||||
## Outstanding follow-ups
|
||||
|
||||
Tracked but outside the scope of this PR:
|
||||
|
||||
- Off-box / LAN / phone reachability via OpenClaw relay (blocked on relay
|
||||
ownership / protocol clarity).
|
||||
- Optional `winget install Microsoft.WSL` as a **platform** repair fallback
|
||||
(deeper research in flight). Distro creation stays on `wsl --install`
|
||||
regardless.
|
||||
- Internationalization of the onboarding copy (`Onboarding_SetupWarning_*`
|
||||
/ `Onboarding_LocalSetupProgress_*` resw entries across the supported
|
||||
locales).
|
||||
|
||||
See `docs/wsl-owner-open-issues.md` for the structured Q&A explaining **why**
|
||||
this design is what it is, with Craig's verbatim answers.
|
||||
6
global.json
Normal file
6
global.json
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"sdk": {
|
||||
"version": "10.0.100",
|
||||
"rollForward": "latestFeature"
|
||||
}
|
||||
}
|
||||
@ -1,13 +0,0 @@
|
||||
<Solution>
|
||||
<Folder Name="/src/">
|
||||
<Project Path="src/OpenClaw.CommandPalette/OpenClaw.CommandPalette.csproj">
|
||||
<Platform Project="x64" />
|
||||
</Project>
|
||||
<Project Path="src/OpenClaw.Shared/OpenClaw.Shared.csproj" />
|
||||
<Project Path="src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/tests/">
|
||||
<Project Path="tests/OpenClaw.Shared.Tests/OpenClaw.Shared.Tests.csproj" />
|
||||
<Project Path="tests/OpenClaw.Tray.Tests/OpenClaw.Tray.Tests.csproj" />
|
||||
</Folder>
|
||||
</Solution>
|
||||
38
openclaw-windows-node.slnx
Normal file
38
openclaw-windows-node.slnx
Normal file
@ -0,0 +1,38 @@
|
||||
<Solution>
|
||||
<Configurations>
|
||||
<Platform Name="Any CPU" />
|
||||
<Platform Name="x64" />
|
||||
<Platform Name="ARM64" />
|
||||
<BuildType Name="Debug" />
|
||||
<BuildType Name="Release" />
|
||||
</Configurations>
|
||||
<Folder Name="/src/">
|
||||
<Project Path="src/OpenClaw.Cli/OpenClaw.Cli.csproj" />
|
||||
<Project Path="src/OpenClaw.WinNode.Cli/OpenClaw.WinNode.Cli.csproj" />
|
||||
<Project Path="src/OpenClaw.CommandPalette/OpenClaw.CommandPalette.csproj">
|
||||
<Platform Solution="*|Any CPU" Project="x64" />
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
<Platform Solution="*|ARM64" Project="ARM64" />
|
||||
</Project>
|
||||
<Project Path="src/OpenClaw.Shared/OpenClaw.Shared.csproj" />
|
||||
<Project Path="src/OpenClawTray.FunctionalUI/OpenClawTray.FunctionalUI.csproj" />
|
||||
<Project Path="src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj">
|
||||
<!-- WindowsAppSDK.SelfContained requires a concrete Platform (x64/ARM64); AnyCPU would need a RID. -->
|
||||
<Platform Solution="*|Any CPU" Project="x64" />
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
<Platform Solution="*|ARM64" Project="ARM64" />
|
||||
</Project>
|
||||
</Folder>
|
||||
<Folder Name="/tests/">
|
||||
<Project Path="tests/OpenClaw.Shared.Tests/OpenClaw.Shared.Tests.csproj" />
|
||||
<Project Path="tests/OpenClaw.WinNode.Cli.Tests/OpenClaw.WinNode.Cli.Tests.csproj" />
|
||||
<Project Path="tests/OpenClaw.Tray.Tests/OpenClaw.Tray.Tests.csproj" />
|
||||
<Project Path="tests/OpenClawTray.FunctionalUI.Tests/OpenClawTray.FunctionalUI.Tests.csproj" />
|
||||
<Project Path="tests/OpenClaw.Tray.IntegrationTests/OpenClaw.Tray.IntegrationTests.csproj" />
|
||||
<Project Path="tests/OpenClaw.Tray.UITests/OpenClaw.Tray.UITests.csproj">
|
||||
<Platform Solution="*|Any CPU" Project="x64" />
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
<Platform Solution="*|ARM64" Project="ARM64" />
|
||||
</Project>
|
||||
</Folder>
|
||||
</Solution>
|
||||
326
scripts/dev-reset-rebuild-launch.ps1
Normal file
326
scripts/dev-reset-rebuild-launch.ps1
Normal file
@ -0,0 +1,326 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Dev-loop helper: kill → backup/wipe state → optionally wipe WSL distro → build x64 → (optionally) launch tray.
|
||||
|
||||
.DESCRIPTION
|
||||
Consolidates the full dev-reset cycle used during OpenClaw tray development.
|
||||
Idempotent: no error if nothing is running, state dirs are absent, or the WSL
|
||||
distro is not registered.
|
||||
|
||||
Process kills are always by PID (Stop-Process -Id). Name-based kills are
|
||||
forbidden in this repo.
|
||||
|
||||
WSL file operations use 'wsl bash -c' — never \\wsl$\ paths (which trigger
|
||||
Windows permission prompts via the 9P protocol).
|
||||
|
||||
.PARAMETER WipeWslDistro
|
||||
Also unregister the OpenClawGateway WSL distro (wsl --unregister).
|
||||
Default: off (preserve the distro).
|
||||
|
||||
.PARAMETER CaptureDir
|
||||
If set, exports OPENCLAW_VISUAL_TEST=1 and OPENCLAW_VISUAL_TEST_DIR=<path>
|
||||
before launching the tray so the app auto-captures screenshots.
|
||||
|
||||
.PARAMETER SkipBuild
|
||||
Skip the 'dotnet build' step. Useful when you have just built.
|
||||
|
||||
.PARAMETER DontLaunch
|
||||
Reset and (optionally) build, but do not launch the tray.
|
||||
|
||||
.PARAMETER WorktreePath
|
||||
Root of the git worktree to operate in.
|
||||
Default: result of 'git rev-parse --show-toplevel' in the current directory.
|
||||
|
||||
.PARAMETER NoBackup
|
||||
Instead of backing up state dirs to TEMP, delete them directly.
|
||||
Faster, but no rollback.
|
||||
|
||||
.EXAMPLE
|
||||
.\scripts\dev-reset-rebuild-launch.ps1
|
||||
Standard reset + rebuild + launch (no WSL wipe, no capture).
|
||||
|
||||
.EXAMPLE
|
||||
.\scripts\dev-reset-rebuild-launch.ps1 -WipeWslDistro
|
||||
Full clean slate: also unregister the OpenClawGateway WSL distro.
|
||||
|
||||
.EXAMPLE
|
||||
.\scripts\dev-reset-rebuild-launch.ps1 -DontLaunch
|
||||
Reset + build only (useful before testing manually).
|
||||
|
||||
.EXAMPLE
|
||||
.\scripts\dev-reset-rebuild-launch.ps1 -CaptureDir .\visual-test-output\my-test
|
||||
Reset + build + launch with OPENCLAW_VISUAL_TEST capture enabled.
|
||||
#>
|
||||
|
||||
[CmdletBinding(SupportsShouldProcess)]
|
||||
param(
|
||||
[switch]$WipeWslDistro,
|
||||
[string]$CaptureDir = "",
|
||||
[switch]$SkipBuild,
|
||||
[switch]$DontLaunch,
|
||||
[string]$WorktreePath = "",
|
||||
[switch]$NoBackup
|
||||
)
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
# ─── Resolve worktree path ────────────────────────────────────────────────────
|
||||
|
||||
if ([string]::IsNullOrWhiteSpace($WorktreePath)) {
|
||||
$gitTop = & git rev-parse --show-toplevel 2>$null
|
||||
if ($LASTEXITCODE -ne 0 -or [string]::IsNullOrWhiteSpace($gitTop)) {
|
||||
Write-Error "Cannot resolve worktree path: not inside a git repository and -WorktreePath was not supplied."
|
||||
exit 1
|
||||
}
|
||||
$WorktreePath = $gitTop.Trim()
|
||||
}
|
||||
$WorktreePath = (Resolve-Path -LiteralPath $WorktreePath).Path
|
||||
|
||||
# ─── Constants ────────────────────────────────────────────────────────────────
|
||||
|
||||
$DistroName = "OpenClawGateway"
|
||||
$TrayProject = Join-Path $WorktreePath "src\OpenClaw.Tray.WinUI\OpenClaw.Tray.WinUI.csproj"
|
||||
$AppDataDir = Join-Path $env:APPDATA "OpenClawTray"
|
||||
$LocalAppDataDir = Join-Path $env:LOCALAPPDATA "OpenClawTray"
|
||||
$timestamp = (Get-Date).ToString("yyyy-MM-ddTHH-mm-ss")
|
||||
$BackupRoot = Join-Path $env:TEMP "openclaw-test-backup-$timestamp"
|
||||
|
||||
# ─── Summary state ────────────────────────────────────────────────────────────
|
||||
|
||||
$summary = [ordered]@{
|
||||
backupPath = $null
|
||||
distroState = "not-checked"
|
||||
buildResult = "skipped"
|
||||
launchPid = $null
|
||||
}
|
||||
|
||||
# ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
function Write-Step {
|
||||
param([string]$Icon, [string]$Message)
|
||||
Write-Host " $Icon $Message"
|
||||
}
|
||||
function Write-OK { param([string]$m) Write-Step "✓" $m }
|
||||
function Write-Skip { param([string]$m) Write-Step "-" $m }
|
||||
function Write-Fail { param([string]$m) Write-Step "x" $m }
|
||||
|
||||
function Get-OpenClawProcesses {
|
||||
@(Get-Process -ErrorAction SilentlyContinue | Where-Object { $_.ProcessName -like "OpenClaw*" })
|
||||
}
|
||||
|
||||
function Get-WslDistros {
|
||||
$out = & wsl.exe --list --quiet 2>$null
|
||||
if ($LASTEXITCODE -ne 0 -or $null -eq $out) { return @() }
|
||||
@($out | ForEach-Object { ($_ -replace "`0", "").Trim() } | Where-Object { $_ })
|
||||
}
|
||||
|
||||
# ─── Banner ───────────────────────────────────────────────────────────────────
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "============================================================"
|
||||
Write-Host " OpenClaw Dev Loop -- Reset / Rebuild / Launch"
|
||||
Write-Host "============================================================"
|
||||
Write-Host " Timestamp : $timestamp"
|
||||
Write-Host " WorktreePath : $WorktreePath"
|
||||
Write-Host " WipeWslDistro: $WipeWslDistro SkipBuild: $SkipBuild DontLaunch: $DontLaunch"
|
||||
Write-Host " NoBackup : $NoBackup CaptureDir: $(if ($CaptureDir) { $CaptureDir } else { '(none)' })"
|
||||
if ($WhatIfPreference) {
|
||||
Write-Host " *** WHATIF MODE -- no state will be changed ***"
|
||||
}
|
||||
Write-Host ""
|
||||
|
||||
# =============================================================================
|
||||
# STEP 1 -- Kill OpenClaw* processes (by PID; name-based kills are forbidden)
|
||||
# =============================================================================
|
||||
|
||||
Write-Host "STEP 1: Kill OpenClaw* processes"
|
||||
$procs = @(Get-OpenClawProcesses)
|
||||
|
||||
if ($procs.Count -eq 0) {
|
||||
Write-Skip "No OpenClaw* processes running"
|
||||
}
|
||||
else {
|
||||
foreach ($p in $procs) {
|
||||
if ($PSCmdlet.ShouldProcess("PID $($p.Id) ($($p.ProcessName))", "Stop-Process -Id")) {
|
||||
try {
|
||||
Stop-Process -Id $p.Id -Force
|
||||
Write-OK "Stopped PID $($p.Id) ($($p.ProcessName))"
|
||||
}
|
||||
catch {
|
||||
Write-Fail "Failed to stop PID $($p.Id): $_"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would stop PID $($p.Id) ($($p.ProcessName))"
|
||||
}
|
||||
}
|
||||
if (-not $WhatIfPreference) {
|
||||
Start-Sleep -Milliseconds 500 # brief pause for file-lock release
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# STEP 2 -- Backup or wipe tray state dirs
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "STEP 2: $(if ($NoBackup) { 'Wipe' } else { 'Backup' }) tray state dirs"
|
||||
|
||||
function Invoke-StateDirReset {
|
||||
param([string]$Path, [string]$Label)
|
||||
|
||||
if (-not (Test-Path -LiteralPath $Path)) {
|
||||
Write-Skip "$Label not present -- nothing to do"
|
||||
return
|
||||
}
|
||||
|
||||
if ($NoBackup) {
|
||||
if ($PSCmdlet.ShouldProcess($Path, "Remove-Item -Recurse -Force")) {
|
||||
Remove-Item -LiteralPath $Path -Recurse -Force
|
||||
Write-OK "Deleted $Label ($Path)"
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would delete $Label ($Path)"
|
||||
}
|
||||
}
|
||||
else {
|
||||
$dest = Join-Path $BackupRoot $Label
|
||||
if ($PSCmdlet.ShouldProcess($Path, "Copy-Item to backup then Remove-Item")) {
|
||||
New-Item -ItemType Directory -Force -Path $BackupRoot | Out-Null
|
||||
Copy-Item -LiteralPath $Path -Destination $dest -Recurse -Force
|
||||
Remove-Item -LiteralPath $Path -Recurse -Force
|
||||
Write-OK "Backed up $Label --> $dest"
|
||||
$script:summary.backupPath = $BackupRoot
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would backup $Label --> $dest, then remove source"
|
||||
$script:summary.backupPath = "(whatif) $BackupRoot"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Invoke-StateDirReset -Path $AppDataDir -Label "AppData_OpenClawTray"
|
||||
Invoke-StateDirReset -Path $LocalAppDataDir -Label "LocalAppData_OpenClawTray"
|
||||
|
||||
# =============================================================================
|
||||
# STEP 3 -- Optionally wipe the WSL distro
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "STEP 3: WSL distro ($DistroName)"
|
||||
|
||||
$distros = @(Get-WslDistros)
|
||||
$distroExists = $distros -contains $DistroName
|
||||
|
||||
if (-not $WipeWslDistro) {
|
||||
Write-Skip "-WipeWslDistro not set -- preserving $DistroName"
|
||||
$summary.distroState = if ($distroExists) { "preserved" } else { "absent" }
|
||||
}
|
||||
elseif (-not $distroExists) {
|
||||
Write-Skip "$DistroName is not registered -- nothing to unregister"
|
||||
$summary.distroState = "absent"
|
||||
}
|
||||
else {
|
||||
if ($PSCmdlet.ShouldProcess($DistroName, "wsl --terminate then wsl --unregister")) {
|
||||
& wsl.exe --terminate $DistroName 2>$null # ignore exit code -- distro may already be stopped
|
||||
& wsl.exe --unregister $DistroName
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Fail "wsl --unregister $DistroName failed (exit $LASTEXITCODE)"
|
||||
exit 1
|
||||
}
|
||||
Write-OK "Unregistered WSL distro $DistroName"
|
||||
$summary.distroState = "unregistered"
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would terminate + unregister WSL distro $DistroName"
|
||||
$summary.distroState = "(whatif) would-unregister"
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# STEP 4 -- Build x64 tray
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "STEP 4: Build x64 tray"
|
||||
|
||||
if ($SkipBuild) {
|
||||
Write-Skip "-SkipBuild set -- skipping dotnet build"
|
||||
$summary.buildResult = "skipped"
|
||||
}
|
||||
else {
|
||||
if (-not (Test-Path -LiteralPath $TrayProject)) {
|
||||
Write-Fail "Tray project not found: $TrayProject"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if ($PSCmdlet.ShouldProcess($TrayProject, "dotnet build -p:Platform=x64 --no-restore -v q")) {
|
||||
Write-Verbose "Running: dotnet build `"$TrayProject`" -p:Platform=x64 --no-restore -v q"
|
||||
& dotnet build $TrayProject -p:Platform=x64 --no-restore -v q
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Fail "dotnet build failed (exit $LASTEXITCODE)"
|
||||
$summary.buildResult = "failed"
|
||||
exit 1
|
||||
}
|
||||
Write-OK "Build succeeded"
|
||||
$summary.buildResult = "succeeded"
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would run: dotnet build `"$TrayProject`" -p:Platform=x64 --no-restore -v q"
|
||||
$summary.buildResult = "(whatif) would-build"
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# STEP 5 -- Launch tray
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "STEP 5: Launch tray"
|
||||
|
||||
if ($DontLaunch) {
|
||||
Write-Skip "-DontLaunch set -- not launching"
|
||||
}
|
||||
else {
|
||||
if ($PSCmdlet.ShouldProcess($TrayProject, "dotnet run -p:Platform=x64")) {
|
||||
if ($CaptureDir) {
|
||||
$captureAbs = if ([System.IO.Path]::IsPathRooted($CaptureDir)) {
|
||||
$CaptureDir
|
||||
}
|
||||
else {
|
||||
Join-Path $WorktreePath $CaptureDir
|
||||
}
|
||||
$env:OPENCLAW_VISUAL_TEST = "1"
|
||||
$env:OPENCLAW_VISUAL_TEST_DIR = $captureAbs
|
||||
Write-Verbose "Set OPENCLAW_VISUAL_TEST=1 OPENCLAW_VISUAL_TEST_DIR=$captureAbs"
|
||||
}
|
||||
|
||||
Write-Verbose "Launching: dotnet run --project `"$TrayProject`" -p:Platform=x64"
|
||||
$launchProc = Start-Process -FilePath "dotnet" `
|
||||
-ArgumentList "run", "--project", $TrayProject, "-p:Platform=x64" `
|
||||
-PassThru -WorkingDirectory $WorktreePath
|
||||
$summary.launchPid = $launchProc.Id
|
||||
Write-OK "Tray launched (PID $($launchProc.Id))"
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would launch: dotnet run --project `"$TrayProject`" -p:Platform=x64"
|
||||
if ($CaptureDir) {
|
||||
Write-Skip "WhatIf: would also set OPENCLAW_VISUAL_TEST=1 and OPENCLAW_VISUAL_TEST_DIR=$CaptureDir"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Summary
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "---------------------------- Summary ----------------------------"
|
||||
Write-Host " Backup path : $(if ($summary.backupPath) { $summary.backupPath } elseif ($NoBackup) { '(deleted directly)' } else { '(nothing backed up)' })"
|
||||
Write-Host " Distro state : $($summary.distroState)"
|
||||
Write-Host " Build result : $($summary.buildResult)"
|
||||
Write-Host " Launch PID : $(if ($summary.launchPid) { $summary.launchPid } else { '(not launched)' })"
|
||||
Write-Host "-----------------------------------------------------------------"
|
||||
Write-Host ""
|
||||
388
scripts/reset-openclaw-wsl-validation-state.ps1
Normal file
388
scripts/reset-openclaw-wsl-validation-state.ps1
Normal file
@ -0,0 +1,388 @@
|
||||
# reset-openclaw-wsl-validation-state.ps1
|
||||
#
|
||||
# Exact-target destructive cleanup for OpenClaw-owned WSL validation state.
|
||||
#
|
||||
# Safety guarantees enforced by this script:
|
||||
# 1. Without -ConfirmDestructiveClean, the script runs in DRY-RUN mode and
|
||||
# reports what it WOULD do; it never mutates state.
|
||||
# 2. The only WSL distro this script will ever touch is the production
|
||||
# constant "OpenClawGateway". Any other distro name is rejected.
|
||||
# 3. Destructive operations are preceded by a copy of the user's
|
||||
# %APPDATA%\OpenClawTray and %LOCALAPPDATA%\OpenClawTray identity
|
||||
# directories to a timestamped backup location (printed to console).
|
||||
# 4. The script never calls `wsl --shutdown`. It uses
|
||||
# `wsl --terminate OpenClawGateway` only.
|
||||
# 5. The script never reads or writes \\wsl$ / \\wsl.localhost paths.
|
||||
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[string]$OutputDir = (Join-Path (Get-Location) "artifacts\wsl-gateway-validation\reset"),
|
||||
[string]$BackupRoot,
|
||||
[string]$AppDataRoot,
|
||||
[string]$LocalAppDataRoot,
|
||||
[string]$InstallLocation,
|
||||
[switch]$CleanInstallLocation,
|
||||
[switch]$ConfirmDestructiveClean,
|
||||
[switch]$KeepRunningProcesses,
|
||||
[switch]$PassThruJson
|
||||
)
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
# Production-locked WSL distro name (Phase 3 constant). This script will
|
||||
# refuse to act on any other distro, even via -DistroName overrides
|
||||
# (which are intentionally absent).
|
||||
$script:OpenClawDistroName = "OpenClawGateway"
|
||||
|
||||
$startedAt = Get-Date
|
||||
$timestamp = $startedAt.ToString("yyyyMMddHHmmss")
|
||||
|
||||
if ([string]::IsNullOrWhiteSpace($BackupRoot)) {
|
||||
$BackupRoot = Join-Path (Get-Location) "artifacts\reset-backups\$timestamp"
|
||||
}
|
||||
|
||||
$result = [ordered]@{
|
||||
script = "reset-openclaw-wsl-validation-state"
|
||||
startedAt = $startedAt.ToString("o")
|
||||
finishedAt = $null
|
||||
outputDir = $OutputDir
|
||||
backupRoot = $BackupRoot
|
||||
distroName = $script:OpenClawDistroName
|
||||
installLocation = $InstallLocation
|
||||
appDataRoot = $AppDataRoot
|
||||
localAppDataRoot = $LocalAppDataRoot
|
||||
destructiveConfirmed = [bool]$ConfirmDestructiveClean
|
||||
dryRun = -not $ConfirmDestructiveClean
|
||||
targets = [ordered]@{}
|
||||
steps = @()
|
||||
}
|
||||
|
||||
function Add-ResetStep {
|
||||
param(
|
||||
[string]$Name,
|
||||
[string]$Status,
|
||||
[string]$Message,
|
||||
[hashtable]$Data = @{}
|
||||
)
|
||||
|
||||
$script:result.steps += [ordered]@{
|
||||
name = $Name
|
||||
status = $Status
|
||||
message = $Message
|
||||
data = $Data
|
||||
timestamp = (Get-Date).ToString("o")
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-CapturedCommand {
|
||||
param(
|
||||
[string]$Name,
|
||||
[string]$FilePath,
|
||||
[string[]]$ArgumentList,
|
||||
[string]$WorkingDirectory = (Get-Location).Path,
|
||||
[switch]$IgnoreExitCode
|
||||
)
|
||||
|
||||
$stepDir = Join-Path $OutputDir "commands"
|
||||
New-Item -ItemType Directory -Force -Path $stepDir | Out-Null
|
||||
$safeName = $Name -replace "[^a-zA-Z0-9_.-]", "-"
|
||||
$stdout = Join-Path $stepDir "$safeName.stdout.txt"
|
||||
$stderr = Join-Path $stepDir "$safeName.stderr.txt"
|
||||
|
||||
Push-Location $WorkingDirectory
|
||||
try {
|
||||
& $FilePath @ArgumentList > $stdout 2> $stderr
|
||||
$exitCode = if ($null -eq $global:LASTEXITCODE) { 0 } else { $global:LASTEXITCODE }
|
||||
}
|
||||
finally {
|
||||
Pop-Location
|
||||
}
|
||||
|
||||
Add-ResetStep $Name "Completed" "Command completed with exit code $exitCode." @{
|
||||
file = $FilePath
|
||||
arguments = ($ArgumentList -join " ")
|
||||
exitCode = $exitCode
|
||||
stdout = $stdout
|
||||
stderr = $stderr
|
||||
}
|
||||
|
||||
if ($exitCode -ne 0 -and -not $IgnoreExitCode) {
|
||||
throw "$Name failed with exit code $exitCode. See $stdout and $stderr."
|
||||
}
|
||||
}
|
||||
|
||||
function Backup-Directory {
|
||||
param(
|
||||
[string]$Path,
|
||||
[string]$Label
|
||||
)
|
||||
|
||||
if (-not (Test-Path -LiteralPath $Path)) {
|
||||
Add-ResetStep "backup-$Label" "Skipped" "$Path does not exist."
|
||||
return
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $BackupRoot | Out-Null
|
||||
$leaf = Split-Path -Leaf $Path
|
||||
$destination = Join-Path $BackupRoot "$Label-$leaf"
|
||||
|
||||
if ($result.dryRun) {
|
||||
Add-ResetStep "backup-$Label" "DryRun" "Would copy $Path to $destination, then remove the original." @{
|
||||
source = $Path
|
||||
destination = $destination
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (Test-Path -LiteralPath $destination) {
|
||||
$destination = Join-Path $BackupRoot ("{0}-{1:yyyyMMddHHmmss}" -f "$Label-$leaf", (Get-Date))
|
||||
}
|
||||
|
||||
# Copy first so the user can recover even if removal fails partway.
|
||||
Copy-Item -LiteralPath $Path -Destination $destination -Recurse -Force
|
||||
Remove-Item -LiteralPath $Path -Recurse -Force
|
||||
Add-ResetStep "backup-$Label" "Completed" "Backed up $Path to $destination, then removed the original." @{
|
||||
source = $Path
|
||||
destination = $destination
|
||||
}
|
||||
}
|
||||
|
||||
function Assert-DestructiveTargetIsAllowed {
|
||||
# Hard-lock: this script will only ever touch the production OpenClawGateway distro.
|
||||
# No override flag exists. If $script:OpenClawDistroName is ever something else,
|
||||
# the script must refuse to run regardless of dry-run mode.
|
||||
if ($script:OpenClawDistroName -ne "OpenClawGateway") {
|
||||
throw "Refusing to run: distro name is locked to 'OpenClawGateway' but resolved to '$($script:OpenClawDistroName)'."
|
||||
}
|
||||
}
|
||||
|
||||
function Get-PortOwnerSnapshot {
|
||||
param([string]$Label)
|
||||
|
||||
$port = 18789
|
||||
try {
|
||||
$connections = @(Get-NetTCPConnection -LocalPort $port -ErrorAction Stop)
|
||||
$snapshot = @($connections | ForEach-Object {
|
||||
[ordered]@{
|
||||
localAddress = $_.LocalAddress
|
||||
localPort = $_.LocalPort
|
||||
state = $_.State.ToString()
|
||||
owningProcess = $_.OwningProcess
|
||||
}
|
||||
})
|
||||
}
|
||||
catch {
|
||||
$snapshot = @()
|
||||
}
|
||||
|
||||
$snapshotPath = Join-Path $OutputDir "port-18789-$Label.json"
|
||||
$snapshot | ConvertTo-Json -Depth 5 | Set-Content -LiteralPath $snapshotPath -Encoding UTF8
|
||||
Add-ResetStep "port-snapshot-$Label" "Completed" "Captured TCP listener snapshot for port 18789." @{
|
||||
path = $snapshotPath
|
||||
ownerCount = @($snapshot).Count
|
||||
}
|
||||
return $snapshot
|
||||
}
|
||||
|
||||
function Get-WslDistros {
|
||||
$output = & wsl.exe --list --quiet 2>$null
|
||||
if ($LASTEXITCODE -ne 0 -or $null -eq $output) {
|
||||
return @()
|
||||
}
|
||||
|
||||
return @($output | ForEach-Object { ($_ -replace "`0", "").Trim() } | Where-Object { $_ })
|
||||
}
|
||||
|
||||
function Get-OpenClawProcesses {
|
||||
return @(Get-Process | Where-Object { $_.ProcessName -like "OpenClaw*" })
|
||||
}
|
||||
|
||||
function Add-TargetSummary {
|
||||
param(
|
||||
[object[]]$Processes,
|
||||
[string[]]$Distros,
|
||||
[string]$AppDataPath,
|
||||
[string]$LocalAppDataPath,
|
||||
[string]$InstallLocationPath,
|
||||
[object[]]$PortOwners
|
||||
)
|
||||
|
||||
$script:result.targets = [ordered]@{
|
||||
processes = @($Processes | ForEach-Object {
|
||||
[ordered]@{
|
||||
pid = $_.Id
|
||||
name = $_.ProcessName
|
||||
path = $_.Path
|
||||
}
|
||||
})
|
||||
distroExists = ($Distros -contains $script:OpenClawDistroName)
|
||||
distroName = $script:OpenClawDistroName
|
||||
appDataPath = $AppDataPath
|
||||
appDataExists = Test-Path -LiteralPath $AppDataPath
|
||||
localAppDataPath = $LocalAppDataPath
|
||||
localAppDataExists = Test-Path -LiteralPath $LocalAppDataPath
|
||||
installLocationPath = $InstallLocationPath
|
||||
installLocationExists = (-not [string]::IsNullOrWhiteSpace($InstallLocationPath)) -and (Test-Path -LiteralPath $InstallLocationPath)
|
||||
installLocationCleanupRequested = [bool]$CleanInstallLocation
|
||||
port18789OwnersBefore = @($PortOwners)
|
||||
outputDir = $OutputDir
|
||||
backupRoot = $BackupRoot
|
||||
}
|
||||
|
||||
Add-ResetStep "target-summary" "Completed" "Captured OpenClaw-owned reset targets." @{
|
||||
processCount = @($Processes).Count
|
||||
distroExists = [bool]$script:result.targets.distroExists
|
||||
appDataExists = [bool]$script:result.targets.appDataExists
|
||||
localAppDataExists = [bool]$script:result.targets.localAppDataExists
|
||||
installLocationExists = [bool]$script:result.targets.installLocationExists
|
||||
}
|
||||
}
|
||||
|
||||
function Assert-CleanPostCondition {
|
||||
param(
|
||||
[string]$AppDataPath,
|
||||
[string]$LocalAppDataPath,
|
||||
[string]$InstallLocationPath
|
||||
)
|
||||
|
||||
if ($result.dryRun) {
|
||||
Add-ResetStep "postconditions" "Skipped" "Postconditions are skipped during dry-run."
|
||||
return
|
||||
}
|
||||
|
||||
$remainingProcesses = @(Get-OpenClawProcesses)
|
||||
if (-not $KeepRunningProcesses -and $remainingProcesses.Count -gt 0) {
|
||||
throw "OpenClaw processes are still running after reset: $(@($remainingProcesses | ForEach-Object { $_.Id }) -join ', ')"
|
||||
}
|
||||
|
||||
$remainingDistros = @(Get-WslDistros)
|
||||
if ($remainingDistros -contains $script:OpenClawDistroName) {
|
||||
throw "WSL distro '$($script:OpenClawDistroName)' is still registered after reset."
|
||||
}
|
||||
|
||||
if (Test-Path -LiteralPath $AppDataPath) {
|
||||
throw "AppData path still exists after reset: $AppDataPath"
|
||||
}
|
||||
|
||||
if (Test-Path -LiteralPath $LocalAppDataPath) {
|
||||
throw "LocalAppData path still exists after reset: $LocalAppDataPath"
|
||||
}
|
||||
|
||||
if ($CleanInstallLocation -and -not [string]::IsNullOrWhiteSpace($InstallLocationPath) -and (Test-Path -LiteralPath $InstallLocationPath)) {
|
||||
throw "Install location still exists after reset: $InstallLocationPath"
|
||||
}
|
||||
|
||||
$wslListAfterPath = Join-Path $OutputDir "wsl-list-after.txt"
|
||||
& wsl.exe --list --verbose > $wslListAfterPath 2>&1
|
||||
$script:result.targets.port18789OwnersAfter = @(Get-PortOwnerSnapshot -Label "after")
|
||||
Add-ResetStep "postconditions" "Passed" "OpenClaw-owned state reset postconditions passed." @{
|
||||
wslListAfter = $wslListAfterPath
|
||||
}
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null
|
||||
|
||||
try {
|
||||
Assert-DestructiveTargetIsAllowed
|
||||
|
||||
if ([string]::IsNullOrWhiteSpace($AppDataRoot)) {
|
||||
$AppDataRoot = $env:APPDATA
|
||||
$result.appDataRoot = $AppDataRoot
|
||||
}
|
||||
if ([string]::IsNullOrWhiteSpace($LocalAppDataRoot)) {
|
||||
$LocalAppDataRoot = $env:LOCALAPPDATA
|
||||
$result.localAppDataRoot = $LocalAppDataRoot
|
||||
}
|
||||
|
||||
$appData = Join-Path $AppDataRoot "OpenClawTray"
|
||||
$localAppData = Join-Path $LocalAppDataRoot "OpenClawTray"
|
||||
$processes = @(Get-OpenClawProcesses)
|
||||
$distros = @(Get-WslDistros)
|
||||
$portOwnersBefore = @(Get-PortOwnerSnapshot -Label "before")
|
||||
Add-TargetSummary -Processes $processes -Distros $distros -AppDataPath $appData -LocalAppDataPath $localAppData -InstallLocationPath $InstallLocation -PortOwners $portOwnersBefore
|
||||
|
||||
if ($result.dryRun) {
|
||||
Add-ResetStep "mode" "DryRun" "No state will be changed. Pass -ConfirmDestructiveClean to reset OpenClaw-owned state."
|
||||
Write-Host "DRY-RUN: pass -ConfirmDestructiveClean to actually reset OpenClaw-owned state."
|
||||
}
|
||||
else {
|
||||
Add-ResetStep "mode" "Confirmed" "OpenClaw-owned state reset is enabled for this run."
|
||||
Write-Host "Backups will be written under: $BackupRoot"
|
||||
}
|
||||
|
||||
if ($processes.Count -eq 0) {
|
||||
Add-ResetStep "stop-openclaw-processes" "Skipped" "No OpenClaw processes are running."
|
||||
}
|
||||
elseif ($KeepRunningProcesses) {
|
||||
Add-ResetStep "stop-openclaw-processes" "Skipped" "Keeping running OpenClaw processes because -KeepRunningProcesses was set." @{
|
||||
pids = @($processes | ForEach-Object { $_.Id })
|
||||
}
|
||||
}
|
||||
elseif ($result.dryRun) {
|
||||
Add-ResetStep "stop-openclaw-processes" "DryRun" "Would stop running OpenClaw processes by PID." @{
|
||||
pids = @($processes | ForEach-Object { $_.Id })
|
||||
}
|
||||
}
|
||||
else {
|
||||
foreach ($process in $processes) {
|
||||
Stop-Process -Id $process.Id -Force
|
||||
}
|
||||
Add-ResetStep "stop-openclaw-processes" "Completed" "Stopped running OpenClaw processes by PID." @{
|
||||
pids = @($processes | ForEach-Object { $_.Id })
|
||||
}
|
||||
}
|
||||
|
||||
$hasGatewayDistro = $distros -contains $script:OpenClawDistroName
|
||||
$wslListPath = Join-Path $OutputDir "wsl-list-before.txt"
|
||||
& wsl.exe --list --verbose > $wslListPath 2>&1
|
||||
Add-ResetStep "capture-wsl-list" "Completed" "Captured WSL distro list." @{ path = $wslListPath }
|
||||
|
||||
if (-not $hasGatewayDistro) {
|
||||
Add-ResetStep "unregister-$($script:OpenClawDistroName)" "Skipped" "WSL distro '$($script:OpenClawDistroName)' is not registered."
|
||||
}
|
||||
elseif ($result.dryRun) {
|
||||
Add-ResetStep "unregister-$($script:OpenClawDistroName)" "DryRun" "Would terminate and unregister only the '$($script:OpenClawDistroName)' WSL distro." @{ distroName = $script:OpenClawDistroName }
|
||||
}
|
||||
else {
|
||||
# Exact-target only: --terminate <name>, never --shutdown.
|
||||
Invoke-CapturedCommand "wsl-terminate-$($script:OpenClawDistroName)" "wsl.exe" @("--terminate", $script:OpenClawDistroName) -IgnoreExitCode
|
||||
Invoke-CapturedCommand "wsl-unregister-$($script:OpenClawDistroName)" "wsl.exe" @("--unregister", $script:OpenClawDistroName)
|
||||
}
|
||||
|
||||
Backup-Directory -Path $appData -Label "appdata"
|
||||
Backup-Directory -Path $localAppData -Label "localappdata"
|
||||
if ($CleanInstallLocation) {
|
||||
if ([string]::IsNullOrWhiteSpace($InstallLocation)) {
|
||||
Add-ResetStep "backup-install-location" "Skipped" "No install location was supplied."
|
||||
}
|
||||
else {
|
||||
Backup-Directory -Path $InstallLocation -Label "install-location"
|
||||
}
|
||||
}
|
||||
else {
|
||||
Add-ResetStep "backup-install-location" "Skipped" "Install location cleanup was not requested."
|
||||
}
|
||||
Assert-CleanPostCondition -AppDataPath $appData -LocalAppDataPath $localAppData -InstallLocationPath $InstallLocation
|
||||
|
||||
$result.finishedAt = (Get-Date).ToString("o")
|
||||
$summaryPath = Join-Path $OutputDir "reset-summary.json"
|
||||
$result | ConvertTo-Json -Depth 10 | Set-Content -LiteralPath $summaryPath -Encoding UTF8
|
||||
if ($PassThruJson) {
|
||||
$result | ConvertTo-Json -Depth 10
|
||||
}
|
||||
else {
|
||||
Write-Host "Reset summary: $summaryPath"
|
||||
if (-not $result.dryRun) {
|
||||
Write-Host "Backup root: $BackupRoot"
|
||||
}
|
||||
}
|
||||
}
|
||||
catch {
|
||||
$result.finishedAt = (Get-Date).ToString("o")
|
||||
Add-ResetStep "reset" "Failed" $_.Exception.Message
|
||||
$summaryPath = Join-Path $OutputDir "reset-summary.json"
|
||||
$result | ConvertTo-Json -Depth 10 | Set-Content -LiteralPath $summaryPath -Encoding UTF8
|
||||
Write-Error $_.Exception.Message
|
||||
exit 1
|
||||
}
|
||||
941
scripts/validate-wsl-gateway.ps1
Normal file
941
scripts/validate-wsl-gateway.ps1
Normal file
@ -0,0 +1,941 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Validate the OpenClaw WSL gateway local-setup product code path end-to-end.
|
||||
|
||||
.DESCRIPTION
|
||||
Phase 6 clean port. Drives the WinUI3 tray app from launch through the
|
||||
forked onboarding (SetupWarningPage -> "Set up locally" -> LocalSetupProgressPage)
|
||||
so the *product* code path that runs
|
||||
|
||||
wsl --install Ubuntu-24.04 --name OpenClawGateway --location <path> --no-launch --version 2
|
||||
|
||||
is exercised end-to-end. The script does NOT install WSL itself and does NOT
|
||||
invoke `wsl --install` directly: it expects the tray engine to do that and
|
||||
only verifies the postcondition.
|
||||
|
||||
Networking diagnostics are loopback-only. There is no WSL-IP / lan / auto
|
||||
fallback. Token / setup-code / private-key material is redacted in artifacts.
|
||||
|
||||
.PARAMETER Scenario
|
||||
PreflightOnly - Repo layout + WSL host status + relay probe (safe; no install).
|
||||
UpstreamInstall - Build/test, drive tray onboarding to install OpenClawGateway,
|
||||
run smoke + pairing proofs. Reuses an existing distro if present.
|
||||
FreshMachine - Like UpstreamInstall, but unregisters any existing
|
||||
OpenClawGateway distro first (simulates a clean machine).
|
||||
Recreate - Iterated FreshMachine (unregister between runs). Use `-Iterations`.
|
||||
|
||||
.NOTES
|
||||
Diagnostics on networking/lifecycle health failures point operators at
|
||||
https://aka.ms/wsllogs (per Craig).
|
||||
|
||||
File I/O against WSL is via `wsl bash -c` only. NEVER \\wsl$ / \\wsl.localhost.
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[ValidateSet("PreflightOnly", "UpstreamInstall", "FreshMachine", "Recreate")]
|
||||
[string]$Scenario = "PreflightOnly",
|
||||
[string]$OutputDir = (Join-Path (Get-Location) "artifacts\wsl-gateway-validation"),
|
||||
[int]$Iterations = 1,
|
||||
[switch]$ConfirmDestructiveClean,
|
||||
[switch]$KeepFailedDistro,
|
||||
[bool]$CleanupAfterSuccess = $true,
|
||||
[switch]$ContinueOnCleanupFailure,
|
||||
[switch]$NoBuild,
|
||||
[int]$TimeoutSeconds = 600,
|
||||
[string]$DistroName = "OpenClawGateway",
|
||||
[string]$GatewayUrl = "ws://127.0.0.1:18789",
|
||||
[string]$RelayProbeUri,
|
||||
[switch]$RequireRelayProbe,
|
||||
[switch]$RequireRealGatewayBootstrap,
|
||||
[switch]$RequireOperatorPairing,
|
||||
[switch]$RequireWindowsNodePairing,
|
||||
[switch]$ContinueOnFailure
|
||||
)
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
$repoRoot = Resolve-Path (Join-Path $PSScriptRoot "..")
|
||||
$runStamp = Get-Date -Format "yyyyMMdd-HHmmss"
|
||||
$runRoot = Join-Path $OutputDir $runStamp
|
||||
$commandsRoot = Join-Path $runRoot "commands"
|
||||
$screenshotsRoot = Join-Path $runRoot "screenshots"
|
||||
$summaryPath = Join-Path $runRoot "summary.json"
|
||||
$summaryMarkdownPath = Join-Path $runRoot "summary.md"
|
||||
$trayProject = Join-Path $repoRoot "src\OpenClaw.Tray.WinUI\OpenClaw.Tray.WinUI.csproj"
|
||||
$runtimeIdentifier = if ($env:PROCESSOR_ARCHITECTURE -eq "ARM64") { "win-arm64" } else { "win-x64" }
|
||||
$trayExe = Join-Path $repoRoot "src\OpenClaw.Tray.WinUI\bin\Debug\net10.0-windows10.0.19041.0\$runtimeIdentifier\OpenClaw.Tray.WinUI.exe"
|
||||
$cliProject = Join-Path $repoRoot "src\OpenClaw.Cli\OpenClaw.Cli.csproj"
|
||||
|
||||
# Always isolate AppData under run root for non-Preflight scenarios so we never
|
||||
# trample the operator's real Windows tray identity.
|
||||
$validationAppDataRoot = if ($Scenario -eq "PreflightOnly") { $env:APPDATA } else { Join-Path $runRoot "isolated\appdata" }
|
||||
$validationLocalAppDataRoot = if ($Scenario -eq "PreflightOnly") { $env:LOCALAPPDATA } else { Join-Path $runRoot "isolated\localappdata" }
|
||||
$setupStatePath = Join-Path $validationLocalAppDataRoot "OpenClawTray\setup-state.json"
|
||||
$settingsPath = Join-Path $validationAppDataRoot "settings.json"
|
||||
$wslInstallLocation = Join-Path $runRoot "wsl\$DistroName"
|
||||
|
||||
$script:summary = [ordered]@{
|
||||
script = "validate-wsl-gateway"
|
||||
scenario = $Scenario
|
||||
startedAt = (Get-Date).ToString("o")
|
||||
finishedAt = $null
|
||||
status = "Running"
|
||||
validationStatus = "Running"
|
||||
cleanupStatus = "NotStarted"
|
||||
repository = $repoRoot.Path
|
||||
outputDir = $runRoot
|
||||
networkingMode = "LocalhostOnly"
|
||||
activeDistroName = $DistroName
|
||||
activeInstallLocation = $wslInstallLocation
|
||||
selectedGatewayUrl = $GatewayUrl
|
||||
pairingValidation = [ordered]@{
|
||||
gatewayImplementation = "Unknown"
|
||||
bootstrapQrShape = "Unknown"
|
||||
realUpstreamBootstrapHandoff = $false
|
||||
operatorPaired = $false
|
||||
windowsNodePaired = $false
|
||||
}
|
||||
setupPhases = @()
|
||||
iterations = @()
|
||||
steps = @()
|
||||
error = $null
|
||||
}
|
||||
|
||||
function Add-Step {
|
||||
param([string]$Name, [string]$Status, [string]$Message, [hashtable]$Data = @{})
|
||||
$script:summary.steps += [ordered]@{
|
||||
name = $Name
|
||||
status = $Status
|
||||
message = $Message
|
||||
data = $Data
|
||||
timestamp = (Get-Date).ToString("o")
|
||||
}
|
||||
}
|
||||
|
||||
function Test-IsOpenClawOwnedDistroName {
|
||||
param([string]$Name)
|
||||
return $Name -eq "OpenClawGateway" -or $Name.StartsWith("OpenClawGateway", [System.StringComparison]::Ordinal)
|
||||
}
|
||||
|
||||
function Assert-DestructiveSafety {
|
||||
if ($Scenario -in @("FreshMachine", "Recreate") -and -not $ConfirmDestructiveClean) {
|
||||
throw "-ConfirmDestructiveClean is required when -Scenario is $Scenario (will unregister WSL distro '$DistroName')."
|
||||
}
|
||||
if ($Scenario -in @("FreshMachine", "Recreate") -and -not (Test-IsOpenClawOwnedDistroName -Name $DistroName)) {
|
||||
throw "Refusing destructive action for non-OpenClaw distro '$DistroName'. Distro name must start with 'OpenClawGateway'."
|
||||
}
|
||||
}
|
||||
|
||||
function Get-SafeUriDisplay {
|
||||
param([string]$Uri)
|
||||
try {
|
||||
$b = [System.UriBuilder]::new($Uri)
|
||||
$b.Query = $null; $b.Fragment = $null
|
||||
return $b.Uri.AbsoluteUri
|
||||
} catch {
|
||||
return "<invalid-uri>"
|
||||
}
|
||||
}
|
||||
|
||||
function Write-Summary {
|
||||
New-Item -ItemType Directory -Force -Path $runRoot | Out-Null
|
||||
$script:summary.finishedAt = (Get-Date).ToString("o")
|
||||
$script:summary | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $summaryPath -Encoding UTF8
|
||||
|
||||
$lines = @(
|
||||
"# OpenClaw WSL gateway validation",
|
||||
"",
|
||||
"- Scenario: $Scenario",
|
||||
"- Status: $($script:summary.status)",
|
||||
"- Validation: $($script:summary.validationStatus)",
|
||||
"- Cleanup: $($script:summary.cleanupStatus)",
|
||||
"- Networking mode: LocalhostOnly (loopback only)",
|
||||
"- Started: $($script:summary.startedAt)",
|
||||
"- Finished: $($script:summary.finishedAt)",
|
||||
"- Output: $runRoot",
|
||||
"",
|
||||
"## Steps"
|
||||
)
|
||||
foreach ($step in $script:summary.steps) {
|
||||
$lines += "- $($step.status): $($step.name) - $($step.message)"
|
||||
}
|
||||
if ($script:summary.error) {
|
||||
$lines += "", "## Error", $script:summary.error
|
||||
$lines += "", "Diagnostics: see https://aka.ms/wsllogs for WSL networking/lifecycle logs."
|
||||
}
|
||||
$lines | Set-Content -LiteralPath $summaryMarkdownPath -Encoding UTF8
|
||||
}
|
||||
|
||||
function Redact-SensitiveGatewayOutput {
|
||||
param([string]$Content)
|
||||
if ([string]::IsNullOrEmpty($Content)) { return $Content }
|
||||
$r = $Content -replace '("(?:bootstrapToken|bootstrap_token|deviceToken|device_token|token|setupCode|setup_code|PrivateKeyBase64|PublicKeyBase64)"\s*:\s*")[^"]+(")', '$1<redacted>$2'
|
||||
$r = $r -replace '(?i)((?:bootstrap|device|gateway|auth)[_-]?token\s*[:=]\s*)[^\s,"''}]+', '$1<redacted>'
|
||||
return $r
|
||||
}
|
||||
|
||||
function Read-TextFileWithRetry {
|
||||
param([string]$Path, [int]$Attempts = 10, [int]$DelayMilliseconds = 200)
|
||||
for ($i = 1; $i -le $Attempts; $i++) {
|
||||
try { return Get-Content -LiteralPath $Path -Raw -ErrorAction Stop }
|
||||
catch [System.IO.IOException] { if ($i -eq $Attempts) { throw } ; Start-Sleep -Milliseconds $DelayMilliseconds }
|
||||
}
|
||||
}
|
||||
|
||||
function Write-TextFileWithRetry {
|
||||
param([string]$Path, [string]$Content, [int]$Attempts = 10, [int]$DelayMilliseconds = 200)
|
||||
for ($i = 1; $i -le $Attempts; $i++) {
|
||||
try { $Content | Set-Content -LiteralPath $Path -Encoding UTF8 -ErrorAction Stop ; return }
|
||||
catch [System.IO.IOException] { if ($i -eq $Attempts) { throw } ; Start-Sleep -Milliseconds $DelayMilliseconds }
|
||||
}
|
||||
}
|
||||
|
||||
function Copy-RedactedFileIfExists {
|
||||
param([string]$SourcePath, [string]$DestinationPath)
|
||||
if (-not (Test-Path -LiteralPath $SourcePath)) { return $false }
|
||||
$content = Read-TextFileWithRetry -Path $SourcePath
|
||||
Write-TextFileWithRetry -Path $DestinationPath -Content (Redact-SensitiveGatewayOutput $content)
|
||||
return $true
|
||||
}
|
||||
|
||||
function Invoke-LoggedProcess {
|
||||
param(
|
||||
[string]$Name,
|
||||
[string]$FilePath,
|
||||
[string[]]$ArgumentList,
|
||||
[string]$WorkingDirectory = $repoRoot.Path,
|
||||
[hashtable]$Environment = @{},
|
||||
[switch]$IgnoreExitCode,
|
||||
[switch]$SensitiveOutput
|
||||
)
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $commandsRoot | Out-Null
|
||||
$safe = $Name -replace "[^a-zA-Z0-9_.-]", "-"
|
||||
$stdout = Join-Path $commandsRoot "$safe.stdout.txt"
|
||||
$stderr = Join-Path $commandsRoot "$safe.stderr.txt"
|
||||
$saved = @{}
|
||||
foreach ($k in $Environment.Keys) {
|
||||
$saved[$k] = [Environment]::GetEnvironmentVariable($k, "Process")
|
||||
[Environment]::SetEnvironmentVariable($k, [string]$Environment[$k], "Process")
|
||||
}
|
||||
Push-Location $WorkingDirectory
|
||||
try {
|
||||
& $FilePath @ArgumentList > $stdout 2> $stderr
|
||||
$exitCode = if ($null -eq $global:LASTEXITCODE) { 0 } else { $global:LASTEXITCODE }
|
||||
} finally {
|
||||
Pop-Location
|
||||
foreach ($k in $Environment.Keys) {
|
||||
[Environment]::SetEnvironmentVariable($k, $saved[$k], "Process")
|
||||
}
|
||||
}
|
||||
|
||||
if ($SensitiveOutput) {
|
||||
foreach ($p in @($stdout, $stderr)) {
|
||||
if (Test-Path -LiteralPath $p) {
|
||||
$c = Read-TextFileWithRetry -Path $p -Attempts 20 -DelayMilliseconds 250
|
||||
Write-TextFileWithRetry -Path $p -Content (Redact-SensitiveGatewayOutput $c) -Attempts 20 -DelayMilliseconds 250
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Add-Step $Name "Completed" "Command completed with exit code $exitCode." @{
|
||||
file = $FilePath; arguments = ($ArgumentList -join " "); exitCode = $exitCode; stdout = $stdout; stderr = $stderr
|
||||
}
|
||||
|
||||
if ($exitCode -ne 0 -and -not $IgnoreExitCode) {
|
||||
throw "$Name failed with exit code $exitCode. See $stdout and $stderr."
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-LoggedPowerShellScript {
|
||||
param([string]$Name, [string]$ScriptPath, [string[]]$ArgumentList = @())
|
||||
$hostExe = if ($PSHOME -and (Test-Path (Join-Path $PSHOME "pwsh.exe"))) { Join-Path $PSHOME "pwsh.exe" } else { "powershell.exe" }
|
||||
$args = @("-NoProfile", "-ExecutionPolicy", "Bypass", "-File", $ScriptPath) + $ArgumentList
|
||||
Invoke-LoggedProcess -Name $Name -FilePath $hostExe -ArgumentList $args
|
||||
}
|
||||
|
||||
function Invoke-RepositoryValidation {
|
||||
if ($NoBuild) {
|
||||
Add-Step "repository-validation" "Skipped" "Skipped build and tests because -NoBuild was set."
|
||||
return
|
||||
}
|
||||
Invoke-LoggedPowerShellScript "build" (Join-Path $repoRoot "build.ps1")
|
||||
Invoke-LoggedProcess "test-shared" "dotnet" @("test", ".\tests\OpenClaw.Shared.Tests\OpenClaw.Shared.Tests.csproj", "--no-restore")
|
||||
Invoke-LoggedProcess "test-tray" "dotnet" @("test", ".\tests\OpenClaw.Tray.Tests\OpenClaw.Tray.Tests.csproj", "--no-restore")
|
||||
}
|
||||
|
||||
function Invoke-Preflight {
|
||||
Invoke-LoggedProcess "dotnet-info" "dotnet" @("--info") -IgnoreExitCode
|
||||
Invoke-LoggedProcess "wsl-status" "wsl.exe" @("--status") -IgnoreExitCode
|
||||
Invoke-LoggedProcess "wsl-list-before" "wsl.exe" @("--list", "--verbose") -IgnoreExitCode
|
||||
|
||||
if (-not (Test-Path -LiteralPath $trayProject)) { throw "Tray project not found: $trayProject" }
|
||||
if (-not (Test-Path -LiteralPath $cliProject)) { throw "CLI project not found: $cliProject" }
|
||||
Add-Step "repo-layout" "Passed" "Required projects are present."
|
||||
|
||||
Invoke-RelayPrototypeProbe
|
||||
}
|
||||
|
||||
function Invoke-RelayPrototypeProbe {
|
||||
$probeUri = if (-not [string]::IsNullOrWhiteSpace($RelayProbeUri)) { $RelayProbeUri } else { [Environment]::GetEnvironmentVariable("OPENCLAW_RELAY_PROBE_URI", "Process") }
|
||||
if ([string]::IsNullOrWhiteSpace($probeUri)) {
|
||||
$msg = "No relay probe endpoint was supplied. Set -RelayProbeUri or OPENCLAW_RELAY_PROBE_URI."
|
||||
if ($RequireRelayProbe) { throw "RelayProbeMissing: $msg" }
|
||||
Add-Step "relay-prototype-probe" "NotAvailable" $msg
|
||||
return
|
||||
}
|
||||
$relayPath = Join-Path $commandsRoot "relay-prototype-probe.txt"
|
||||
New-Item -ItemType Directory -Force -Path $commandsRoot | Out-Null
|
||||
try {
|
||||
$r = Invoke-WebRequest -Uri $probeUri -TimeoutSec 15 -UseBasicParsing
|
||||
$body = if ($null -ne $r.Content) { $r.Content } else { "" }
|
||||
$body = $body -replace '(?i)(token=)[^&\s]+', '$1<redacted>'
|
||||
$body | Set-Content -LiteralPath $relayPath -Encoding UTF8
|
||||
Add-Step "relay-prototype-probe" "Passed" "Relay probe endpoint responded." @{
|
||||
uri = (Get-SafeUriDisplay $probeUri); statusCode = [int]$r.StatusCode; path = $relayPath
|
||||
}
|
||||
} catch {
|
||||
throw "RelayProbeFailed: relay probe failed for $(Get-SafeUriDisplay $probeUri): $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
function Get-LatestScreenshotPath {
|
||||
if (-not (Test-Path -LiteralPath $screenshotsRoot)) { return $null }
|
||||
$latest = Get-ChildItem -LiteralPath $screenshotsRoot -Filter "*.png" -File -Recurse |
|
||||
Sort-Object LastWriteTime -Descending | Select-Object -First 1
|
||||
if ($null -eq $latest) { return $null }
|
||||
return $latest.FullName
|
||||
}
|
||||
|
||||
function Save-DiagnosticsSnapshot {
|
||||
param([string]$Reason)
|
||||
$diag = Join-Path $runRoot "diagnostics"
|
||||
New-Item -ItemType Directory -Force -Path $diag | Out-Null
|
||||
|
||||
if (Test-Path -LiteralPath $setupStatePath) {
|
||||
Copy-RedactedFileIfExists -SourcePath $setupStatePath -DestinationPath (Join-Path $diag "setup-state.redacted.json") | Out-Null
|
||||
}
|
||||
if (Test-Path -LiteralPath $settingsPath) {
|
||||
Copy-RedactedFileIfExists -SourcePath $settingsPath -DestinationPath (Join-Path $diag "settings.redacted.json") | Out-Null
|
||||
}
|
||||
$identityPath = Join-Path $validationAppDataRoot "OpenClawTray\device-key-ed25519.json"
|
||||
if (Test-Path -LiteralPath $identityPath) {
|
||||
Copy-RedactedFileIfExists -SourcePath $identityPath -DestinationPath (Join-Path $diag "device-key.shape.redacted.json") | Out-Null
|
||||
}
|
||||
|
||||
Add-Step "diagnostics-snapshot" "Completed" "Saved diagnostics snapshot for $Reason. See https://aka.ms/wsllogs for WSL networking/lifecycle logs." @{
|
||||
path = $diag
|
||||
latestScreenshot = (Get-LatestScreenshotPath)
|
||||
wslLogsHelp = "https://aka.ms/wsllogs"
|
||||
}
|
||||
}
|
||||
|
||||
function Get-ValidationAppEnvironment {
|
||||
return @{
|
||||
OPENCLAW_TRAY_DATA_DIR = $validationAppDataRoot
|
||||
OPENCLAW_TRAY_APPDATA_DIR = $validationAppDataRoot
|
||||
OPENCLAW_TRAY_LOCALAPPDATA_DIR = $validationLocalAppDataRoot
|
||||
}
|
||||
}
|
||||
|
||||
function Convert-SetupStatus {
|
||||
param([object]$Status)
|
||||
$v = [string]$Status
|
||||
if ($v -match '^\d+$') {
|
||||
# Aligned with LocalGatewaySetupStatus enum
|
||||
$names = @("Pending", "Running", "RequiresAdmin", "RequiresRestart", "Blocked",
|
||||
"FailedRetryable", "FailedTerminal", "Complete", "Cancelled")
|
||||
$i = [int]$v
|
||||
if ($i -ge 0 -and $i -lt $names.Count) { return $names[$i] }
|
||||
}
|
||||
return $v
|
||||
}
|
||||
|
||||
function Convert-SetupPhase {
|
||||
param([object]$Phase)
|
||||
$v = [string]$Phase
|
||||
if ($v -match '^\d+$') {
|
||||
# Aligned with the clean LocalGatewaySetupPhase enum (worker / rootfs phases removed).
|
||||
$names = @(
|
||||
"NotStarted", "Preflight", "ElevationCheck",
|
||||
"EnsureWslEnabled", "CreateWslInstance", "ConfigureWslInstance",
|
||||
"InstallOpenClawCli", "PrepareGatewayConfig", "InstallGatewayService",
|
||||
"StartGateway", "WaitForGateway",
|
||||
"MintBootstrapToken", "PairOperator",
|
||||
"CheckWindowsNodeReadiness", "PairWindowsTrayNode",
|
||||
"VerifyEndToEnd", "Complete", "Failed", "Cancelled"
|
||||
)
|
||||
$i = [int]$v
|
||||
if ($i -ge 0 -and $i -lt $names.Count) { return $names[$i] }
|
||||
}
|
||||
return $v
|
||||
}
|
||||
|
||||
function Wait-ForUiAutomationElement {
|
||||
param([string]$AutomationId, [int]$TimeoutSeconds)
|
||||
Add-Type -AssemblyName UIAutomationClient
|
||||
Add-Type -AssemblyName UIAutomationTypes
|
||||
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
|
||||
$cond = New-Object System.Windows.Automation.PropertyCondition(
|
||||
[System.Windows.Automation.AutomationElement]::AutomationIdProperty, $AutomationId)
|
||||
while ((Get-Date) -lt $deadline) {
|
||||
$el = [System.Windows.Automation.AutomationElement]::RootElement.FindFirst(
|
||||
[System.Windows.Automation.TreeScope]::Descendants, $cond)
|
||||
if ($null -ne $el) { return $el }
|
||||
Start-Sleep -Milliseconds 500
|
||||
}
|
||||
return $null
|
||||
}
|
||||
|
||||
function Invoke-UiAutomationClick {
|
||||
param([string]$AutomationId, [int]$TimeoutSeconds)
|
||||
$el = Wait-ForUiAutomationElement -AutomationId $AutomationId -TimeoutSeconds $TimeoutSeconds
|
||||
if ($null -ne $el) {
|
||||
$p = $el.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)
|
||||
$p.Invoke()
|
||||
Add-Step "ui-click-$AutomationId" "Completed" "Clicked UI element with AutomationId '$AutomationId'."
|
||||
return
|
||||
}
|
||||
Save-DiagnosticsSnapshot -Reason "missing-ui-target-$AutomationId"
|
||||
throw "UI element with AutomationId '$AutomationId' was not found within $TimeoutSeconds seconds."
|
||||
}
|
||||
|
||||
function Stop-ExistingTrayProcesses {
|
||||
param([string]$Reason)
|
||||
$repoPrefix = [string]$repoRoot.Path
|
||||
$procs = Get-Process -Name "OpenClaw.Tray.WinUI" -ErrorAction SilentlyContinue |
|
||||
Where-Object {
|
||||
try { -not [string]::IsNullOrWhiteSpace($_.Path) -and $_.Path.StartsWith($repoPrefix, [System.StringComparison]::OrdinalIgnoreCase) }
|
||||
catch { $false }
|
||||
}
|
||||
foreach ($p in $procs) {
|
||||
$procId = $p.Id
|
||||
try {
|
||||
Stop-Process -Id $procId -Force -ErrorAction Stop
|
||||
Add-Step "stop-existing-tray" "Completed" "Stopped existing repo tray process by PID before validation." @{ pid = $procId; reason = $Reason }
|
||||
} catch [Microsoft.PowerShell.Commands.ProcessCommandException] {
|
||||
Add-Step "stop-existing-tray" "Skipped" "Repo tray process had already exited before cleanup." @{ pid = $procId; reason = $Reason }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Stop-WslKeepAliveProcesses {
|
||||
$target = $DistroName
|
||||
$procs = Get-CimInstance Win32_Process -Filter "Name = 'wsl.exe'" -ErrorAction SilentlyContinue |
|
||||
Where-Object {
|
||||
$_.CommandLine -and
|
||||
$_.CommandLine.Contains($target, [System.StringComparison]::OrdinalIgnoreCase) -and
|
||||
$_.CommandLine.Contains("sleep", [System.StringComparison]::OrdinalIgnoreCase) -and
|
||||
$_.CommandLine.Contains("2147483647", [System.StringComparison]::OrdinalIgnoreCase)
|
||||
}
|
||||
foreach ($p in $procs) {
|
||||
try {
|
||||
Stop-Process -Id $p.ProcessId -Force -ErrorAction Stop
|
||||
Add-Step "stop-wsl-keepalive" "Completed" "Stopped $target keepalive process by PID." @{ pid = $p.ProcessId; distroName = $target }
|
||||
} catch [Microsoft.PowerShell.Commands.ProcessCommandException] {
|
||||
Add-Step "stop-wsl-keepalive" "Skipped" "$target keepalive process had already exited." @{ pid = $p.ProcessId; distroName = $target }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Start-TrayForLocalSetup {
|
||||
Stop-ExistingTrayProcesses -Reason "pre-launch"
|
||||
|
||||
# Forked onboarding entry point is SetupWarning by default; we just force
|
||||
# onboarding mode and let the script click "Set up locally".
|
||||
$env = @{
|
||||
OPENCLAW_SKIP_UPDATE_CHECK = "1"
|
||||
OPENCLAW_FORCE_ONBOARDING = "1"
|
||||
OPENCLAW_WSL_DISTRO_NAME = $DistroName
|
||||
OPENCLAW_WSL_INSTALL_LOCATION = $wslInstallLocation
|
||||
OPENCLAW_WSL_ALLOW_EXISTING_DISTRO = if ($Scenario -eq "UpstreamInstall") { "1" } else { "0" }
|
||||
OPENCLAW_TRAY_DATA_DIR = $validationAppDataRoot
|
||||
OPENCLAW_TRAY_APPDATA_DIR = $validationAppDataRoot
|
||||
OPENCLAW_TRAY_LOCALAPPDATA_DIR = $validationLocalAppDataRoot
|
||||
OPENCLAW_VISUAL_TEST = "1"
|
||||
OPENCLAW_VISUAL_TEST_DIR = $screenshotsRoot
|
||||
}
|
||||
|
||||
$saved = @{}
|
||||
foreach ($k in $env.Keys) {
|
||||
$saved[$k] = [Environment]::GetEnvironmentVariable($k, "Process")
|
||||
[Environment]::SetEnvironmentVariable($k, [string]$env[$k], "Process")
|
||||
}
|
||||
|
||||
try {
|
||||
New-Item -ItemType Directory -Force -Path $screenshotsRoot | Out-Null
|
||||
if (-not (Test-Path -LiteralPath $trayExe)) {
|
||||
throw "Built tray executable not found at $trayExe. Run build.ps1 first or omit -NoBuild."
|
||||
}
|
||||
$proc = Start-Process -FilePath $trayExe -WorkingDirectory $repoRoot -PassThru
|
||||
Add-Step "launch-tray" "Completed" "Launched tray onboarding for WSL local setup." @{
|
||||
pid = $proc.Id; screenshots = $screenshotsRoot; file = $trayExe; runtimeIdentifier = $runtimeIdentifier
|
||||
}
|
||||
return $proc
|
||||
} finally {
|
||||
foreach ($k in $env.Keys) {
|
||||
[Environment]::SetEnvironmentVariable($k, $saved[$k], "Process")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Wait-ForSetupCompletion {
|
||||
param([int]$TimeoutSeconds)
|
||||
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
|
||||
$lastPhase = ""; $lastStatus = ""
|
||||
while ((Get-Date) -lt $deadline) {
|
||||
if (Test-Path -LiteralPath $setupStatePath) {
|
||||
$text = Read-TextFileWithRetry -Path $setupStatePath
|
||||
$state = $text | ConvertFrom-Json
|
||||
$copy = Join-Path $runRoot "setup-state.json"
|
||||
$text | Set-Content -LiteralPath $copy -Encoding UTF8
|
||||
|
||||
$phase = Convert-SetupPhase $state.Phase
|
||||
$status = Convert-SetupStatus $state.Status
|
||||
if ($phase -ne $lastPhase -or $status -ne $lastStatus) {
|
||||
$lastPhase = $phase; $lastStatus = $status
|
||||
$script:summary.setupPhases += [ordered]@{
|
||||
phase = $phase; status = $status; message = [string]$state.UserMessage; timestamp = (Get-Date).ToString("o")
|
||||
}
|
||||
Add-Step "setup-phase-$phase" $status ([string]$state.UserMessage) @{ phase = $phase; status = $status }
|
||||
}
|
||||
|
||||
if ($status -eq "Complete") {
|
||||
if ($state.PSObject.Properties.Name -contains "GatewayUrl" -and -not [string]::IsNullOrWhiteSpace([string]$state.GatewayUrl)) {
|
||||
$script:GatewayUrl = [string]$state.GatewayUrl
|
||||
$script:summary.selectedGatewayUrl = $script:GatewayUrl
|
||||
}
|
||||
Add-Step "setup-state" "Passed" "Setup reached $status." @{
|
||||
status = $status; phase = $phase; path = $copy
|
||||
gatewayUrl = (Get-SafeUriDisplay $script:GatewayUrl)
|
||||
}
|
||||
return
|
||||
}
|
||||
if ($status -in @("FailedRetryable", "FailedTerminal", "Blocked", "Cancelled")) {
|
||||
Save-DiagnosticsSnapshot -Reason "setup-failed-$phase"
|
||||
throw "Setup failed with status $status, phase $phase, code $($state.FailureCode): $($state.UserMessage). Diagnostics: https://aka.ms/wsllogs."
|
||||
}
|
||||
}
|
||||
Start-Sleep -Seconds 2
|
||||
}
|
||||
Save-DiagnosticsSnapshot -Reason "setup-timeout"
|
||||
throw "Setup did not reach Complete within $TimeoutSeconds seconds. Diagnostics: https://aka.ms/wsllogs."
|
||||
}
|
||||
|
||||
function Invoke-TrayLocalSetup {
|
||||
$proc = Start-TrayForLocalSetup
|
||||
Start-Sleep -Seconds 5
|
||||
|
||||
# SetupWarningPage hosts the "Set up locally" primary button.
|
||||
if ($null -eq (Wait-ForUiAutomationElement -AutomationId "OnboardingSetupLocal" -TimeoutSeconds 60)) {
|
||||
Save-DiagnosticsSnapshot -Reason "setup-local-button-not-found"
|
||||
throw "UI automation target OnboardingSetupLocal was not found on SetupWarningPage."
|
||||
}
|
||||
Invoke-UiAutomationClick -AutomationId "OnboardingSetupLocal" -TimeoutSeconds 5
|
||||
|
||||
# LocalSetupProgressPage starts the engine on appearance; just wait for state.
|
||||
Wait-ForSetupCompletion -TimeoutSeconds $TimeoutSeconds
|
||||
return $proc
|
||||
}
|
||||
|
||||
function Stop-TrayProcess {
|
||||
param([object]$Process)
|
||||
if ($null -ne $Process) {
|
||||
$procId = $Process.Id
|
||||
$live = Get-Process -Id $procId -ErrorAction SilentlyContinue
|
||||
if ($null -ne $live) {
|
||||
Stop-Process -Id $procId -Force
|
||||
Add-Step "stop-tray" "Completed" "Stopped tray process by PID after setup validation." @{ pid = $procId }
|
||||
} else {
|
||||
Add-Step "stop-tray" "Skipped" "Tray process had already exited before cleanup." @{ pid = $procId }
|
||||
}
|
||||
}
|
||||
Stop-ExistingTrayProcesses -Reason "post-validation"
|
||||
Stop-WslKeepAliveProcesses
|
||||
}
|
||||
|
||||
function Convert-GatewayUrlToHealthUri {
|
||||
param([string]$Url)
|
||||
$b = [System.UriBuilder]::new($Url)
|
||||
if ($b.Scheme -eq "ws") { $b.Scheme = "http" }
|
||||
elseif ($b.Scheme -eq "wss") { $b.Scheme = "https" }
|
||||
$b.Path = ($b.Path.TrimEnd("/") + "/health")
|
||||
return $b.Uri.AbsoluteUri
|
||||
}
|
||||
|
||||
function Save-LoopbackNetworkDiagnostics {
|
||||
param([string]$Reason)
|
||||
# Loopback only - no WSL IP, no `hostname -I`, no lan probes.
|
||||
$safe = $Reason -replace "[^a-zA-Z0-9_.-]", "-"
|
||||
$tcpPath = Join-Path $commandsRoot "network-$safe-windows-tcp-18789.json"
|
||||
try {
|
||||
$cs = @(Get-NetTCPConnection -LocalPort 18789 -ErrorAction Stop | ForEach-Object {
|
||||
[ordered]@{
|
||||
localAddress = $_.LocalAddress; localPort = $_.LocalPort
|
||||
state = $_.State.ToString(); owningProcess = $_.OwningProcess
|
||||
}
|
||||
})
|
||||
$cs | ConvertTo-Json -Depth 5 | Set-Content -LiteralPath $tcpPath -Encoding UTF8
|
||||
Add-Step "network-$safe-windows-tcp" "Completed" "Captured Windows TCP listener state for loopback gateway port." @{ path = $tcpPath }
|
||||
} catch {
|
||||
$_.Exception.Message | Set-Content -LiteralPath $tcpPath -Encoding UTF8
|
||||
Add-Step "network-$safe-windows-tcp" "Skipped" "Could not capture Windows TCP listener state. See https://aka.ms/wsllogs." @{ path = $tcpPath }
|
||||
}
|
||||
}
|
||||
|
||||
function Save-RedactedSettings {
|
||||
if (-not (Test-Path -LiteralPath $settingsPath)) {
|
||||
Add-Step "settings-redacted" "Skipped" "Tray settings file was not found."
|
||||
return
|
||||
}
|
||||
$copy = Join-Path $runRoot "settings.redacted.json"
|
||||
$c = Read-TextFileWithRetry -Path $settingsPath
|
||||
$c = $c -replace '("(?:Token|token|GatewayToken|BootstrapToken|bootstrapToken|bootstrap_token|NodeToken|nodeToken)"\s*:\s*")[^"]*(")', '$1<redacted>$2'
|
||||
$c | Set-Content -LiteralPath $copy -Encoding UTF8
|
||||
Add-Step "settings-redacted" "Completed" "Saved redacted tray settings." @{ path = $copy }
|
||||
}
|
||||
|
||||
function Test-SetupHistoryPhase {
|
||||
param([string]$Phase)
|
||||
if (-not (Test-Path -LiteralPath $setupStatePath)) { return $false }
|
||||
$state = Read-TextFileWithRetry -Path $setupStatePath | ConvertFrom-Json
|
||||
if (-not ($state.PSObject.Properties.Name -contains "History")) { return $false }
|
||||
foreach ($e in @($state.History)) {
|
||||
if ((Convert-SetupPhase $e.Phase) -eq $Phase -and (Convert-SetupStatus $e.Status) -in @("Running", "Complete")) {
|
||||
return $true
|
||||
}
|
||||
}
|
||||
return (Convert-SetupPhase $state.Phase) -eq $Phase
|
||||
}
|
||||
|
||||
function Save-RedactedDeviceIdentityShape {
|
||||
$idp = Join-Path $validationAppDataRoot "OpenClawTray\device-key-ed25519.json"
|
||||
if (-not (Test-Path -LiteralPath $idp)) {
|
||||
Add-Step "device-identity" "Failed" "Device identity file was not found." @{ path = $idp }
|
||||
return $false
|
||||
}
|
||||
$copy = Join-Path $runRoot "device-key.shape.redacted.json"
|
||||
Copy-RedactedFileIfExists -SourcePath $idp -DestinationPath $copy | Out-Null
|
||||
try {
|
||||
$id = Get-Content -LiteralPath $idp -Raw | ConvertFrom-Json
|
||||
$hasOperatorToken = ($id.PSObject.Properties.Name -contains "DeviceToken" -and -not [string]::IsNullOrWhiteSpace([string]$id.DeviceToken)) -or
|
||||
($id.PSObject.Properties.Name -contains "OperatorDeviceToken" -and -not [string]::IsNullOrWhiteSpace([string]$id.OperatorDeviceToken))
|
||||
Add-Step "device-identity" ($(if ($hasOperatorToken) { "Passed" } else { "Failed" })) "Checked stored device identity token shape." @{
|
||||
path = $copy; hasOperatorToken = $hasOperatorToken
|
||||
}
|
||||
return $hasOperatorToken
|
||||
} catch {
|
||||
Add-Step "device-identity" "Failed" "Device identity JSON could not be parsed." @{ path = $copy }
|
||||
return $false
|
||||
}
|
||||
}
|
||||
|
||||
function Test-JsonStringProperty {
|
||||
param([object]$Json, [string[]]$Names)
|
||||
foreach ($n in $Names) {
|
||||
if ($Json.PSObject.Properties.Name -contains $n) {
|
||||
$v = [string]$Json.$n
|
||||
if (-not [string]::IsNullOrWhiteSpace($v)) { return $true }
|
||||
}
|
||||
}
|
||||
return $false
|
||||
}
|
||||
|
||||
function Get-JsonStringProperty {
|
||||
param([object]$Json, [string]$Name)
|
||||
if ($Json -and $Json.PSObject.Properties.Name -contains $Name) { return [string]$Json.$Name }
|
||||
return ""
|
||||
}
|
||||
|
||||
function Invoke-BootstrapHandoffProbe {
|
||||
# Real upstream setup-code / bootstrap proof.
|
||||
$stdout = Join-Path $commandsRoot "wsl-bootstrap-token.stdout.txt"
|
||||
$stderr = Join-Path $commandsRoot "wsl-bootstrap-token.stderr.txt"
|
||||
$args = @("-d", $DistroName, "--", "/opt/openclaw/bin/openclaw", "qr", "--json", "--url", $GatewayUrl)
|
||||
& wsl.exe @args > $stdout 2> $stderr
|
||||
$exitCode = if ($null -eq $global:LASTEXITCODE) { 0 } else { $global:LASTEXITCODE }
|
||||
$raw = if (Test-Path -LiteralPath $stdout) { Read-TextFileWithRetry -Path $stdout -Attempts 20 -DelayMilliseconds 250 } else { "" }
|
||||
Write-TextFileWithRetry -Path $stdout -Content (Redact-SensitiveGatewayOutput $raw) -Attempts 20 -DelayMilliseconds 250
|
||||
|
||||
if ($exitCode -ne 0) {
|
||||
Add-Step "wsl-bootstrap-token" "Failed" "Gateway QR command failed with exit code $exitCode." @{
|
||||
arguments = ($args -join " "); exitCode = $exitCode; stdout = $stdout; stderr = $stderr
|
||||
}
|
||||
throw "BootstrapTokenCommandFailed: openclaw qr --json failed. See $stdout and $stderr."
|
||||
}
|
||||
|
||||
$hasSetupCode = $false; $hasDirectToken = $false
|
||||
try {
|
||||
$qr = $raw | ConvertFrom-Json
|
||||
$hasSetupCode = Test-JsonStringProperty $qr @("setupCode", "setup_code")
|
||||
$hasDirectToken = Test-JsonStringProperty $qr @("bootstrapToken", "bootstrap_token", "token")
|
||||
} catch {
|
||||
throw "BootstrapTokenJsonInvalid: openclaw qr --json did not produce valid JSON: $($_.Exception.Message)"
|
||||
}
|
||||
|
||||
$shape = if ($hasSetupCode) { "UpstreamSetupCode" } elseif ($hasDirectToken) { "DirectBootstrapToken" } else { "Unknown" }
|
||||
$script:summary.pairingValidation["bootstrapQrShape"] = $shape
|
||||
$script:summary.pairingValidation["realUpstreamBootstrapHandoff"] = $hasSetupCode
|
||||
|
||||
Add-Step "wsl-bootstrap-token" "Completed" "Gateway QR command completed; bootstrap shape is $shape." @{
|
||||
arguments = ($args -join " "); exitCode = $exitCode; stdout = $stdout; stderr = $stderr; bootstrapQrShape = $shape; realUpstreamBootstrapHandoff = $hasSetupCode
|
||||
}
|
||||
|
||||
if ($RequireRealGatewayBootstrap -and -not $hasSetupCode) {
|
||||
throw "RealGatewayBootstrapRequired: expected upstream setupCode bootstrap handoff, but openclaw qr --json returned $shape."
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-OperatorPairingProof {
|
||||
if (-not $RequireOperatorPairing) {
|
||||
Add-Step "operator-pairing-proof" "Skipped" "Operator pairing proof was not required."
|
||||
return
|
||||
}
|
||||
if (-not (Test-SetupHistoryPhase -Phase "PairOperator")) {
|
||||
Save-DiagnosticsSnapshot -Reason "operator-pair-phase-missing"
|
||||
throw "OperatorPairingProofFailed: setup state did not record PairOperator."
|
||||
}
|
||||
if (-not (Save-RedactedDeviceIdentityShape)) {
|
||||
Save-DiagnosticsSnapshot -Reason "operator-device-token-missing"
|
||||
throw "OperatorPairingProofFailed: stored operator device token is missing."
|
||||
}
|
||||
Invoke-LoggedProcess "operator-stored-token-reconnect" "dotnet" @(
|
||||
"run", "--project", $cliProject, "--",
|
||||
"--probe-read", "--skip-chat", "--require-stored-device-token",
|
||||
"--connect-timeout-ms", "15000"
|
||||
) -Environment (Get-ValidationAppEnvironment) -SensitiveOutput
|
||||
|
||||
$script:summary.pairingValidation["operatorPaired"] = $true
|
||||
Add-Step "operator-pairing-proof" "Passed" "Stored operator device token reconnect succeeded."
|
||||
}
|
||||
|
||||
function Invoke-WindowsNodePairingProof {
|
||||
# Windows tray IS the node (per Mike). Confirm the PairWindowsTrayNode phase
|
||||
# ran and that gateway node.list returns the tray node.
|
||||
if (-not $RequireWindowsNodePairing) {
|
||||
Add-Step "windows-node-pairing-proof" "Skipped" "Windows tray node pairing proof was not required."
|
||||
return
|
||||
}
|
||||
if (-not (Test-SetupHistoryPhase -Phase "PairWindowsTrayNode")) {
|
||||
Save-DiagnosticsSnapshot -Reason "windows-node-pair-phase-missing"
|
||||
throw "WindowsNodePairingProofFailed: setup state did not record PairWindowsTrayNode."
|
||||
}
|
||||
Invoke-LoggedProcess "windows-node-list-proof" "dotnet" @(
|
||||
"run", "--project", $cliProject, "--",
|
||||
"--probe-read", "--skip-chat", "--require-stored-device-token", "--require-node",
|
||||
"--connect-timeout-ms", "90000"
|
||||
) -Environment (Get-ValidationAppEnvironment) -SensitiveOutput
|
||||
|
||||
$script:summary.pairingValidation["windowsNodePaired"] = $true
|
||||
Add-Step "windows-node-pairing-proof" "Passed" "Gateway node.list returned the Windows tray node."
|
||||
}
|
||||
|
||||
function Invoke-SmokeChecks {
|
||||
Invoke-LoggedProcess "wsl-list-after" "wsl.exe" @("--list", "--verbose") -IgnoreExitCode
|
||||
Save-LoopbackNetworkDiagnostics -Reason "post-install"
|
||||
|
||||
# Gateway in WSL via systemd user unit (UpstreamInstall layout).
|
||||
Invoke-LoggedProcess "wsl-openclaw-version" "wsl.exe" @(
|
||||
"-d", $DistroName, "-u", "openclaw", "--", "/opt/openclaw/bin/openclaw", "--version")
|
||||
Invoke-LoggedProcess "wsl-openclaw-config-validate" "wsl.exe" @(
|
||||
"-d", $DistroName, "-u", "openclaw", "--", "/opt/openclaw/bin/openclaw", "config", "validate")
|
||||
Invoke-LoggedProcess "wsl-gateway-journal" "wsl.exe" @(
|
||||
"-d", $DistroName, "-u", "root", "--", "journalctl", "--user", "-u", "openclaw-gateway",
|
||||
"--no-pager", "-n", "200") -IgnoreExitCode -SensitiveOutput
|
||||
|
||||
# Loopback-only health probe.
|
||||
$healthUri = Convert-GatewayUrlToHealthUri -Url $GatewayUrl
|
||||
$healthPath = Join-Path $commandsRoot "gateway-health.json"
|
||||
try {
|
||||
$h = Invoke-RestMethod -Uri $healthUri -TimeoutSec 10
|
||||
$h | ConvertTo-Json -Depth 10 | Set-Content -LiteralPath $healthPath -Encoding UTF8
|
||||
if (-not $h.ok) { throw "Gateway health response did not contain ok=true." }
|
||||
$gw = if ($h.PSObject.Properties.Name -contains "gateway") { $h.gateway } else { $null }
|
||||
$version = Get-JsonStringProperty $gw "version"
|
||||
$displayName = Get-JsonStringProperty $gw "displayName"
|
||||
$isDev = $version -like "*-dev*" -or $displayName -like "Dev OpenClaw*"
|
||||
$script:summary.pairingValidation["gatewayImplementation"] = if ($isDev) { "DevShim" } else { "ProductionCandidate" }
|
||||
Add-Step "gateway-health" "Passed" "Gateway health endpoint returned ok=true." @{ uri = $healthUri; path = $healthPath }
|
||||
} catch {
|
||||
throw "Gateway health check failed for ${healthUri}: $($_.Exception.Message). Diagnostics: https://aka.ms/wsllogs."
|
||||
}
|
||||
|
||||
Invoke-BootstrapHandoffProbe
|
||||
Save-RedactedSettings
|
||||
Invoke-OperatorPairingProof
|
||||
Invoke-WindowsNodePairingProof
|
||||
|
||||
$args = @(
|
||||
"run", "--project", $cliProject, "--",
|
||||
"--probe-read", "--skip-chat",
|
||||
"--message", "openclaw validation ping",
|
||||
"--connect-timeout-ms", "15000"
|
||||
)
|
||||
if ($RequireOperatorPairing) { $args += "--require-stored-device-token" }
|
||||
Invoke-LoggedProcess "openclaw-cli-probe" "dotnet" $args -Environment (Get-ValidationAppEnvironment) -SensitiveOutput
|
||||
}
|
||||
|
||||
function Invoke-DistroUnregisterIfPresent {
|
||||
param([string]$Reason)
|
||||
Stop-WslKeepAliveProcesses
|
||||
# Authoritative repair primitive: `wsl --unregister`. NEVER `wsl --shutdown`.
|
||||
Invoke-LoggedProcess "wsl-unregister-$Reason" "wsl.exe" @("--unregister", $DistroName) -IgnoreExitCode
|
||||
|
||||
if (Test-Path -LiteralPath $wslInstallLocation) {
|
||||
try {
|
||||
Remove-Item -LiteralPath $wslInstallLocation -Recurse -Force -ErrorAction Stop
|
||||
Add-Step "remove-install-location-$Reason" "Completed" "Removed install location directory." @{ path = $wslInstallLocation }
|
||||
} catch {
|
||||
Add-Step "remove-install-location-$Reason" "Skipped" "Could not remove install location: $($_.Exception.Message)" @{ path = $wslInstallLocation }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-PreIterationCleanup {
|
||||
param([int]$Index)
|
||||
if ($Scenario -in @("FreshMachine", "Recreate")) {
|
||||
Invoke-DistroUnregisterIfPresent -Reason "iteration-$Index-pre"
|
||||
# Wipe isolated AppData so identity store starts empty.
|
||||
foreach ($p in @($validationAppDataRoot, $validationLocalAppDataRoot)) {
|
||||
if (Test-Path -LiteralPath $p) {
|
||||
try { Remove-Item -LiteralPath $p -Recurse -Force -ErrorAction Stop } catch { }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Stop-WslKeepAliveProcesses
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-PostIterationCleanup {
|
||||
param([int]$Index, [bool]$IterationFailed)
|
||||
if ($Scenario -ne "Recreate") {
|
||||
$script:summary.cleanupStatus = if ($script:summary.cleanupStatus -eq "Failed") { "Failed" } else { "Skipped" }
|
||||
Add-Step "iteration-$Index-cleanup" "Skipped" "Post-iteration distro cleanup is only required in Recreate scenario."
|
||||
return "Skipped"
|
||||
}
|
||||
if ($IterationFailed -and $KeepFailedDistro) {
|
||||
$script:summary.cleanupStatus = if ($script:summary.cleanupStatus -eq "Failed") { "Failed" } else { "Skipped" }
|
||||
Add-Step "iteration-$Index-cleanup" "Skipped" "Keeping failed WSL distro for inspection (-KeepFailedDistro)." @{ distroName = $DistroName }
|
||||
return "Skipped"
|
||||
}
|
||||
if (-not $IterationFailed -and -not $CleanupAfterSuccess) {
|
||||
$script:summary.cleanupStatus = if ($script:summary.cleanupStatus -eq "Failed") { "Failed" } else { "Skipped" }
|
||||
Add-Step "iteration-$Index-cleanup" "Skipped" "Leaving successful distro (-CleanupAfterSuccess:`$false)." @{ distroName = $DistroName }
|
||||
return "Skipped"
|
||||
}
|
||||
try {
|
||||
$script:summary.cleanupStatus = "Running"
|
||||
Invoke-DistroUnregisterIfPresent -Reason "iteration-$Index-post"
|
||||
$script:summary.cleanupStatus = "Passed"
|
||||
Add-Step "iteration-$Index-cleanup" "Passed" "Cleaned recreated WSL distro after validation iteration." @{ distroName = $DistroName }
|
||||
return "Passed"
|
||||
} catch {
|
||||
$script:summary.cleanupStatus = "Failed"
|
||||
Add-Step "iteration-$Index-cleanup" "Failed" $_.Exception.Message
|
||||
if (-not $ContinueOnCleanupFailure) { throw }
|
||||
return "Failed"
|
||||
}
|
||||
}
|
||||
|
||||
function New-IterationRecord {
|
||||
param([int]$Index)
|
||||
return [ordered]@{
|
||||
index = $Index
|
||||
distroName = $DistroName
|
||||
installLocation = $wslInstallLocation
|
||||
validationStatus = "Running"
|
||||
cleanupStatus = "NotStarted"
|
||||
error = $null
|
||||
cleanupError = $null
|
||||
startedAt = (Get-Date).ToString("o")
|
||||
finishedAt = $null
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-ValidationIteration {
|
||||
param([int]$Index)
|
||||
$iteration = New-IterationRecord -Index $Index
|
||||
$script:summary.iterations += $iteration
|
||||
Add-Step "iteration-$Index" "Started" "Starting validation iteration $Index."
|
||||
$trayProcess = $null
|
||||
$iterationFailed = $false
|
||||
|
||||
try {
|
||||
Invoke-RepositoryValidation
|
||||
Invoke-PreIterationCleanup -Index $Index
|
||||
$trayProcess = Invoke-TrayLocalSetup
|
||||
Invoke-SmokeChecks
|
||||
|
||||
Add-Step "iteration-$Index" "Passed" "Validation iteration $Index passed."
|
||||
$iteration.validationStatus = "Passed"
|
||||
$script:summary.validationStatus = "Passed"
|
||||
} catch {
|
||||
$iterationFailed = $true
|
||||
$iteration.validationStatus = "Failed"
|
||||
$iteration.error = $_.Exception.Message
|
||||
$script:summary.validationStatus = "Failed"
|
||||
Save-DiagnosticsSnapshot -Reason "iteration-$Index-failed"
|
||||
throw
|
||||
} finally {
|
||||
try {
|
||||
Stop-TrayProcess -Process $trayProcess
|
||||
$iteration.cleanupStatus = Invoke-PostIterationCleanup -Index $Index -IterationFailed $iterationFailed
|
||||
} catch {
|
||||
$iteration.cleanupStatus = "Failed"
|
||||
$iteration.cleanupError = $_.Exception.Message
|
||||
throw
|
||||
} finally {
|
||||
$iteration.finishedAt = (Get-Date).ToString("o")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $runRoot, $commandsRoot, $screenshotsRoot | Out-Null
|
||||
|
||||
$exitCode = 0
|
||||
try {
|
||||
Assert-DestructiveSafety
|
||||
Invoke-Preflight
|
||||
|
||||
if ($Scenario -eq "PreflightOnly") {
|
||||
Add-Step "scenario" "Passed" "Preflight completed."
|
||||
$script:summary.validationStatus = "Passed"
|
||||
$script:summary.cleanupStatus = "Skipped"
|
||||
} elseif ($Scenario -eq "Recreate" -or $Iterations -gt 1) {
|
||||
if ($Iterations -lt 1) { throw "-Iterations must be at least 1." }
|
||||
for ($i = 1; $i -le $Iterations; $i++) {
|
||||
try { Invoke-ValidationIteration -Index $i }
|
||||
catch {
|
||||
Add-Step "iteration-$i" "Failed" $_.Exception.Message
|
||||
if (-not $ContinueOnFailure) { throw }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
# UpstreamInstall or FreshMachine, single shot.
|
||||
Invoke-ValidationIteration -Index 1
|
||||
}
|
||||
|
||||
if ($script:summary.validationStatus -eq "Running") { $script:summary.validationStatus = "Passed" }
|
||||
if ($script:summary.cleanupStatus -in @("Running", "NotStarted")) { $script:summary.cleanupStatus = "Skipped" }
|
||||
if ($script:summary.validationStatus -eq "Failed") {
|
||||
$script:summary.status = "Failed"; $exitCode = 1
|
||||
} else {
|
||||
$script:summary.status = if ($script:summary.cleanupStatus -eq "Failed") { "PassedWithCleanupFailure" } else { "Passed" }
|
||||
}
|
||||
} catch {
|
||||
$script:summary.status = "Failed"
|
||||
if ($script:summary.validationStatus -eq "Running") { $script:summary.validationStatus = "Failed" }
|
||||
if ($script:summary.cleanupStatus -eq "Running") { $script:summary.cleanupStatus = "Failed" }
|
||||
$script:summary.error = $_.Exception.Message
|
||||
Add-Step "validation" "Failed" $_.Exception.Message
|
||||
$exitCode = 1
|
||||
} finally {
|
||||
Write-Summary
|
||||
}
|
||||
|
||||
Write-Host "Validation summary: $summaryPath"
|
||||
if ($script:summary.status -eq "Failed") {
|
||||
Write-Host "Diagnostics: see https://aka.ms/wsllogs for WSL networking/lifecycle logs."
|
||||
}
|
||||
exit $exitCode
|
||||
16
src/Directory.Build.props
Normal file
16
src/Directory.Build.props
Normal file
@ -0,0 +1,16 @@
|
||||
<Project>
|
||||
|
||||
<!--
|
||||
Shared build configuration for OpenClaw.Shared, OpenClaw.Tray.WinUI, and OpenClaw.Cli.
|
||||
|
||||
OpenClaw.CommandPalette has its own Directory.Build.props one level deeper, so MSBuild
|
||||
stops walking up at that file and this one does NOT apply to CommandPalette.
|
||||
-->
|
||||
|
||||
<PropertyGroup>
|
||||
<!-- Audit all package dependencies (direct + transitive) for known CVEs during restore.
|
||||
Defaults to "direct" in the SDK; "all" provides broader security coverage. -->
|
||||
<NuGetAuditMode>all</NuGetAuditMode>
|
||||
</PropertyGroup>
|
||||
|
||||
</Project>
|
||||
12
src/OpenClaw.Cli/OpenClaw.Cli.csproj
Normal file
12
src/OpenClaw.Cli/OpenClaw.Cli.csproj
Normal file
@ -0,0 +1,12 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\OpenClaw.Shared\OpenClaw.Shared.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
300
src/OpenClaw.Cli/Program.cs
Normal file
300
src/OpenClaw.Cli/Program.cs
Normal file
@ -0,0 +1,300 @@
|
||||
using System.Globalization;
|
||||
using System.Text;
|
||||
using OpenClaw.Shared;
|
||||
|
||||
internal sealed class CliOptions
|
||||
{
|
||||
public string SettingsPath { get; set; } = Path.Combine(
|
||||
Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData),
|
||||
"OpenClawTray",
|
||||
"settings.json");
|
||||
|
||||
public string? GatewayUrlOverride { get; set; }
|
||||
public string? TokenOverride { get; set; }
|
||||
public string Message { get; set; } = "openclaw-cli validation ping";
|
||||
public int Repeat { get; set; } = 1;
|
||||
public int DelayMs { get; set; } = 500;
|
||||
public int ConnectTimeoutMs { get; set; } = 10000;
|
||||
public bool ProbeReadApis { get; set; }
|
||||
public bool Verbose { get; set; }
|
||||
}
|
||||
|
||||
internal static class Program
|
||||
{
|
||||
private static async Task<int> Main(string[] args)
|
||||
{
|
||||
if (args.Any(a => a is "--help" or "-h"))
|
||||
{
|
||||
PrintUsage();
|
||||
return 0;
|
||||
}
|
||||
|
||||
CliOptions options;
|
||||
try
|
||||
{
|
||||
options = ParseArgs(args);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Console.Error.WriteLine($"Argument error: {ex.Message}");
|
||||
PrintUsage();
|
||||
return 2;
|
||||
}
|
||||
|
||||
var (gatewayUrl, token, loaded) = LoadConnectionFromSettings(options);
|
||||
if (string.IsNullOrWhiteSpace(gatewayUrl))
|
||||
{
|
||||
Console.Error.WriteLine("Gateway URL is missing. Set it in tray settings or pass --url.");
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(token))
|
||||
{
|
||||
Console.Error.WriteLine("Token is missing. Set it in tray settings or pass --token.");
|
||||
return 2;
|
||||
}
|
||||
|
||||
Console.WriteLine($"Settings file: {options.SettingsPath}");
|
||||
Console.WriteLine($"Gateway URL: {GatewayUrlHelper.SanitizeForDisplay(gatewayUrl)}");
|
||||
Console.WriteLine($"Token source: {(options.TokenOverride is null ? "settings" : "--token override")}");
|
||||
if (loaded is not null)
|
||||
{
|
||||
Console.WriteLine($"Node mode in settings: {loaded.EnableNodeMode}");
|
||||
Console.WriteLine($"SSH tunnel in settings: {loaded.UseSshTunnel} (local port {loaded.SshTunnelLocalPort})");
|
||||
}
|
||||
|
||||
IOpenClawLogger logger = options.Verbose ? new ConsoleLogger() : NullLogger.Instance;
|
||||
using var client = new OpenClawGatewayClient(gatewayUrl, token, logger);
|
||||
|
||||
var lastStatus = ConnectionStatus.Disconnected;
|
||||
var connectedTcs = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
var errorTcs = new TaskCompletionSource<bool>(TaskCreationOptions.RunContinuationsAsynchronously);
|
||||
|
||||
client.StatusChanged += (_, status) =>
|
||||
{
|
||||
lastStatus = status;
|
||||
Console.WriteLine($"Status: {status}");
|
||||
if (status == ConnectionStatus.Connected)
|
||||
{
|
||||
connectedTcs.TrySetResult(true);
|
||||
}
|
||||
else if (status == ConnectionStatus.Error)
|
||||
{
|
||||
errorTcs.TrySetResult(true);
|
||||
}
|
||||
};
|
||||
|
||||
client.SessionsUpdated += (_, sessions) => Console.WriteLine($"sessions.list -> {sessions.Length} session(s)");
|
||||
client.UsageUpdated += (_, usage) => Console.WriteLine($"usage -> tokens {usage.TotalTokens}, requests {usage.RequestCount}, cost ${usage.CostUsd:F4}");
|
||||
client.NodesUpdated += (_, nodes) => Console.WriteLine($"node.list -> {nodes.Length} node(s)");
|
||||
|
||||
Console.WriteLine("Connecting...");
|
||||
await client.ConnectAsync();
|
||||
|
||||
var connected = await WaitForConnectedAsync(connectedTcs.Task, errorTcs.Task, options.ConnectTimeoutMs);
|
||||
if (!connected)
|
||||
{
|
||||
Console.Error.WriteLine($"Connection did not reach Connected within {options.ConnectTimeoutMs}ms (last status: {lastStatus}).");
|
||||
return 1;
|
||||
}
|
||||
|
||||
Console.WriteLine($"Connected. Device ID: {client.OperatorDeviceId ?? "(unknown)"}");
|
||||
Console.WriteLine($"Granted scopes: {string.Join(", ", client.GrantedOperatorScopes)}");
|
||||
|
||||
if (options.ProbeReadApis)
|
||||
{
|
||||
Console.WriteLine("Probing read APIs (sessions/usage/nodes)...");
|
||||
await client.RequestSessionsAsync();
|
||||
await client.RequestUsageAsync();
|
||||
await client.RequestNodesAsync();
|
||||
await Task.Delay(1200);
|
||||
}
|
||||
|
||||
var failures = 0;
|
||||
for (var i = 1; i <= options.Repeat; i++)
|
||||
{
|
||||
var message = options.Repeat == 1
|
||||
? options.Message
|
||||
: $"{options.Message} [attempt {i}/{options.Repeat}]";
|
||||
|
||||
try
|
||||
{
|
||||
Console.WriteLine($"chat.send #{i} -> \"{message}\"");
|
||||
await client.SendChatMessageAsync(message);
|
||||
Console.WriteLine($"chat.send #{i} OK");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
failures++;
|
||||
Console.Error.WriteLine($"chat.send #{i} FAILED: {ex.Message}");
|
||||
}
|
||||
|
||||
if (i < options.Repeat)
|
||||
{
|
||||
await Task.Delay(options.DelayMs);
|
||||
}
|
||||
}
|
||||
|
||||
if (failures > 0)
|
||||
{
|
||||
Console.Error.WriteLine($"Completed with {failures} failed send(s).");
|
||||
return 1;
|
||||
}
|
||||
|
||||
Console.WriteLine("All sends succeeded.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static async Task<bool> WaitForConnectedAsync(Task connected, Task error, int timeoutMs)
|
||||
{
|
||||
using var timeoutCts = new CancellationTokenSource(timeoutMs);
|
||||
var timeoutTask = Task.Delay(Timeout.InfiniteTimeSpan, timeoutCts.Token);
|
||||
|
||||
var completed = await Task.WhenAny(connected, error, timeoutTask);
|
||||
if (completed == connected)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static (string GatewayUrl, string Token, SettingsData? Loaded) LoadConnectionFromSettings(CliOptions options)
|
||||
{
|
||||
var loaded = LoadSettings(options.SettingsPath);
|
||||
|
||||
var gatewayUrl = options.GatewayUrlOverride;
|
||||
if (string.IsNullOrWhiteSpace(gatewayUrl))
|
||||
{
|
||||
gatewayUrl = BuildEffectiveGatewayUrl(loaded);
|
||||
}
|
||||
|
||||
var token = options.TokenOverride;
|
||||
if (string.IsNullOrWhiteSpace(token))
|
||||
{
|
||||
token = loaded?.Token;
|
||||
}
|
||||
|
||||
return (gatewayUrl ?? string.Empty, token ?? string.Empty, loaded);
|
||||
}
|
||||
|
||||
private static SettingsData? LoadSettings(string path)
|
||||
{
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
throw new FileNotFoundException("Settings file not found", path);
|
||||
}
|
||||
|
||||
var json = File.ReadAllText(path, Encoding.UTF8);
|
||||
var settings = SettingsData.FromJson(json);
|
||||
if (settings is null)
|
||||
{
|
||||
throw new InvalidOperationException("Settings JSON could not be parsed");
|
||||
}
|
||||
|
||||
return settings;
|
||||
}
|
||||
|
||||
private static string? BuildEffectiveGatewayUrl(SettingsData? settings)
|
||||
{
|
||||
if (settings is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!settings.UseSshTunnel)
|
||||
{
|
||||
return settings.GatewayUrl;
|
||||
}
|
||||
|
||||
var port = settings.SshTunnelLocalPort <= 0 ? 18789 : settings.SshTunnelLocalPort;
|
||||
return $"ws://127.0.0.1:{port}";
|
||||
}
|
||||
|
||||
private static CliOptions ParseArgs(string[] args)
|
||||
{
|
||||
var options = new CliOptions();
|
||||
|
||||
for (var i = 0; i < args.Length; i++)
|
||||
{
|
||||
var arg = args[i];
|
||||
switch (arg)
|
||||
{
|
||||
case "--settings":
|
||||
options.SettingsPath = RequireValue(args, ref i, arg);
|
||||
break;
|
||||
case "--url":
|
||||
options.GatewayUrlOverride = RequireValue(args, ref i, arg);
|
||||
break;
|
||||
case "--token":
|
||||
options.TokenOverride = RequireValue(args, ref i, arg);
|
||||
break;
|
||||
case "--message":
|
||||
options.Message = RequireValue(args, ref i, arg);
|
||||
break;
|
||||
case "--repeat":
|
||||
options.Repeat = ParseInt(RequireValue(args, ref i, arg), min: 1, name: arg);
|
||||
break;
|
||||
case "--delay-ms":
|
||||
options.DelayMs = ParseInt(RequireValue(args, ref i, arg), min: 0, name: arg);
|
||||
break;
|
||||
case "--connect-timeout-ms":
|
||||
options.ConnectTimeoutMs = ParseInt(RequireValue(args, ref i, arg), min: 1000, name: arg);
|
||||
break;
|
||||
case "--probe-read":
|
||||
options.ProbeReadApis = true;
|
||||
break;
|
||||
case "--verbose":
|
||||
options.Verbose = true;
|
||||
break;
|
||||
default:
|
||||
throw new ArgumentException($"Unknown argument: {arg}");
|
||||
}
|
||||
}
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
private static string RequireValue(string[] args, ref int index, string name)
|
||||
{
|
||||
if (index + 1 >= args.Length)
|
||||
{
|
||||
throw new ArgumentException($"Missing value for {name}");
|
||||
}
|
||||
|
||||
index++;
|
||||
return args[index];
|
||||
}
|
||||
|
||||
private static int ParseInt(string value, int min, string name)
|
||||
{
|
||||
if (!int.TryParse(value, NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsed) || parsed < min)
|
||||
{
|
||||
throw new ArgumentException($"{name} must be an integer >= {min}");
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
private static void PrintUsage()
|
||||
{
|
||||
Console.WriteLine("OpenClaw CLI WebSocket validator");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Reads the same tray settings file and runs chat.send checks over gateway WebSocket.");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Usage:");
|
||||
Console.WriteLine(" dotnet run --project src/OpenClaw.Cli -- [options]");
|
||||
Console.WriteLine();
|
||||
Console.WriteLine("Options:");
|
||||
Console.WriteLine(" --settings <path> Settings file (default: %APPDATA%\\OpenClawTray\\settings.json)");
|
||||
Console.WriteLine(" --url <ws://...> Override gateway URL");
|
||||
Console.WriteLine(" --token <token> Override token");
|
||||
Console.WriteLine(" --message <text> Message to send");
|
||||
Console.WriteLine(" --repeat <n> Number of sends (default: 1)");
|
||||
Console.WriteLine(" --delay-ms <n> Delay between sends (default: 500)");
|
||||
Console.WriteLine(" --connect-timeout-ms <n> Wait for Connected state (default: 10000)");
|
||||
Console.WriteLine(" --probe-read Request sessions/usage/nodes once");
|
||||
Console.WriteLine(" --verbose Enable shared client console logs");
|
||||
Console.WriteLine(" --help, -h Show this help");
|
||||
}
|
||||
}
|
||||
@ -89,9 +89,10 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)'!='Debug'">
|
||||
<!-- In Release builds, trimming is enabled by default.
|
||||
feel free to disable this if needed -->
|
||||
<PublishTrimmed>true</PublishTrimmed>
|
||||
<!-- Trimming requires self-contained publish. Only enable when a RID is set
|
||||
(i.e. during `dotnet publish -r <rid>`), otherwise plain `dotnet build -c Release`
|
||||
fails with NETSDK1102. -->
|
||||
<PublishTrimmed Condition="'$(RuntimeIdentifier)' != ''">true</PublishTrimmed>
|
||||
|
||||
<!-- In release, also ignore the aforementioned ILLink warning -->
|
||||
<ILLinkTreatWarningsAsErrors>false</ILLinkTreatWarningsAsErrors>
|
||||
|
||||
@ -24,6 +24,26 @@ internal sealed partial class OpenClawPage : ListPage
|
||||
Title = "🦞 Open Dashboard",
|
||||
Subtitle = "Open OpenClaw web dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://dashboard/sessions"))
|
||||
{
|
||||
Title = "💬 Dashboard: Sessions",
|
||||
Subtitle = "Open the sessions dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://dashboard/channels"))
|
||||
{
|
||||
Title = "📡 Dashboard: Channels",
|
||||
Subtitle = "Open the channel configuration dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://dashboard/skills"))
|
||||
{
|
||||
Title = "🧩 Dashboard: Skills",
|
||||
Subtitle = "Open the skills dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://dashboard/cron"))
|
||||
{
|
||||
Title = "⏱️ Dashboard: Cron",
|
||||
Subtitle = "Open the scheduled jobs dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://chat"))
|
||||
{
|
||||
Title = "💬 Web Chat",
|
||||
@ -34,10 +54,110 @@ internal sealed partial class OpenClawPage : ListPage
|
||||
Title = "📝 Quick Send",
|
||||
Subtitle = "Send a message to OpenClaw"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://setup"))
|
||||
{
|
||||
Title = "🧭 Setup Wizard",
|
||||
Subtitle = "Open QR, setup code, and manual gateway pairing"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://commandcenter"))
|
||||
{
|
||||
Title = "🧭 Command Center",
|
||||
Subtitle = "Open gateway, tunnel, node, and browser diagnostics"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://healthcheck"))
|
||||
{
|
||||
Title = "🔄 Run Health Check",
|
||||
Subtitle = "Refresh gateway or node connection health"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://check-updates"))
|
||||
{
|
||||
Title = "⬇️ Check for Updates",
|
||||
Subtitle = "Run a manual GitHub Releases update check"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://activity"))
|
||||
{
|
||||
Title = "⚡ Activity Stream",
|
||||
Subtitle = "Open recent tray activity and support bundle actions"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://history"))
|
||||
{
|
||||
Title = "📋 Notification History",
|
||||
Subtitle = "Open recent OpenClaw tray notifications"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://settings"))
|
||||
{
|
||||
Title = "⚙️ Settings",
|
||||
Subtitle = "Configure OpenClaw Tray"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://logs"))
|
||||
{
|
||||
Title = "📄 Open Log File",
|
||||
Subtitle = "Open the current OpenClaw Tray log"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://log-folder"))
|
||||
{
|
||||
Title = "📁 Open Logs Folder",
|
||||
Subtitle = "Open the OpenClaw Tray logs folder"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://config"))
|
||||
{
|
||||
Title = "🗂️ Open Config Folder",
|
||||
Subtitle = "Open the OpenClaw Tray configuration folder"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://diagnostics"))
|
||||
{
|
||||
Title = "🧪 Open Diagnostics Folder",
|
||||
Subtitle = "Open the OpenClaw Tray diagnostics JSONL folder"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://support-context"))
|
||||
{
|
||||
Title = "📋 Copy Support Context",
|
||||
Subtitle = "Copy redacted Command Center support metadata"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://debug-bundle"))
|
||||
{
|
||||
Title = "🧰 Copy Debug Bundle",
|
||||
Subtitle = "Copy support context plus port, capability, node, channel, and activity diagnostics"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://browser-setup"))
|
||||
{
|
||||
Title = "🌐 Copy Browser Setup",
|
||||
Subtitle = "Copy browser.proxy and node-host setup guidance"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://port-diagnostics"))
|
||||
{
|
||||
Title = "🔌 Copy Port Diagnostics",
|
||||
Subtitle = "Copy gateway, browser proxy, tunnel ports, owners, and stop hints"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://capability-diagnostics"))
|
||||
{
|
||||
Title = "🛡️ Copy Capability Diagnostics",
|
||||
Subtitle = "Copy permissions, allowlist health, and parity diagnostics"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://node-inventory"))
|
||||
{
|
||||
Title = "🖥️ Copy Node Inventory",
|
||||
Subtitle = "Copy connected node capabilities, commands, and policy status"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://channel-summary"))
|
||||
{
|
||||
Title = "📡 Copy Channel Summary",
|
||||
Subtitle = "Copy channel health and start/stop availability"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://activity-summary"))
|
||||
{
|
||||
Title = "⚡ Copy Activity Summary",
|
||||
Subtitle = "Copy recent tray activity for troubleshooting"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://extensibility-summary"))
|
||||
{
|
||||
Title = "🧩 Copy Extensibility Summary",
|
||||
Subtitle = "Copy channel, skills, and cron dashboard surface guidance"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://restart-ssh-tunnel"))
|
||||
{
|
||||
Title = "🔁 Restart SSH Tunnel",
|
||||
Subtitle = "Restart the tray-managed SSH tunnel when enabled"
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
70
src/OpenClaw.Shared/Audio/AudioModels.cs
Normal file
70
src/OpenClaw.Shared/Audio/AudioModels.cs
Normal file
@ -0,0 +1,70 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>Result of a speech-to-text transcription segment.</summary>
|
||||
public sealed class TranscriptionResult
|
||||
{
|
||||
public string Text { get; init; } = "";
|
||||
public TimeSpan Start { get; init; }
|
||||
public TimeSpan End { get; init; }
|
||||
public string Language { get; init; } = "en";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Aggregated result of a single silence-bounded utterance — i.e. all the
|
||||
/// Whisper segments produced from one VAD-bounded speech burst, combined.
|
||||
/// Consumers that need "what the user said" (chat submission, stt.listen)
|
||||
/// should listen for this event instead of per-segment TranscriptionResult
|
||||
/// to avoid sending partial text.
|
||||
/// </summary>
|
||||
public sealed class UtteranceResult
|
||||
{
|
||||
/// <summary>Concatenated text across all segments, single-spaced.</summary>
|
||||
public string Text { get; init; } = "";
|
||||
/// <summary>Language detected on the first segment, or null if no segments.</summary>
|
||||
public string? Language { get; init; }
|
||||
/// <summary>Start of the first segment relative to capture start.</summary>
|
||||
public TimeSpan Start { get; init; }
|
||||
/// <summary>End of the last segment relative to capture start.</summary>
|
||||
public TimeSpan End { get; init; }
|
||||
/// <summary>Immutable snapshot of the per-segment results.</summary>
|
||||
public IReadOnlyList<TranscriptionResult> Segments { get; init; } = Array.Empty<TranscriptionResult>();
|
||||
}
|
||||
|
||||
/// <summary>Voice-activity detection event.</summary>
|
||||
public sealed class VadEvent
|
||||
{
|
||||
public bool IsSpeaking { get; init; }
|
||||
public float Probability { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Configuration for the audio pipeline.</summary>
|
||||
public sealed class AudioPipelineOptions
|
||||
{
|
||||
/// <summary>Path to the Whisper GGML model file.</summary>
|
||||
public string ModelPath { get; init; } = "";
|
||||
|
||||
/// <summary>Language code for STT (e.g. "en", "auto").</summary>
|
||||
public string Language { get; init; } = "auto";
|
||||
|
||||
/// <summary>Seconds of silence before a speech segment is finalized.</summary>
|
||||
public float SilenceTimeoutSeconds { get; init; } = 1.5f;
|
||||
|
||||
/// <summary>Optional audio device ID. Null = system default microphone.</summary>
|
||||
public string? DeviceId { get; init; }
|
||||
|
||||
/// <summary>VAD probability threshold (0.0–1.0). Audio above this is considered speech.</summary>
|
||||
public float VadThreshold { get; init; } = 0.3f;
|
||||
}
|
||||
|
||||
/// <summary>Pipeline state.</summary>
|
||||
public enum AudioPipelineState
|
||||
{
|
||||
Stopped,
|
||||
Starting,
|
||||
Listening,
|
||||
Processing,
|
||||
Error
|
||||
}
|
||||
390
src/OpenClaw.Shared/Audio/PiperVoiceManager.cs
Normal file
390
src/OpenClaw.Shared/Audio/PiperVoiceManager.cs
Normal file
@ -0,0 +1,390 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Net.Http;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Manages downloads and on-disk lifecycle for Piper TTS voices.
|
||||
///
|
||||
/// Each "voice" is a sherpa-onnx pre-packaged tarball that contains
|
||||
/// everything needed for offline synthesis — the .onnx model, the
|
||||
/// tokens.txt phoneme map, and the language-specific espeak-ng-data.
|
||||
/// We use the sherpa-onnx repackaged distribution rather than the raw
|
||||
/// HuggingFace Piper voices because the latter requires the user (or
|
||||
/// us) to ship espeak-ng-data separately (~80 MB shared across voices).
|
||||
///
|
||||
/// Storage layout under the tray's data directory:
|
||||
/// models/piper/<voice-id>/
|
||||
/// <voice-id>.onnx
|
||||
/// tokens.txt
|
||||
/// espeak-ng-data/...
|
||||
///
|
||||
/// Each voice is ~50 MB compressed, ~80 MB extracted (with espeak data).
|
||||
///
|
||||
/// **TODO (pre-GA):** SHA-256 verification of downloaded tarballs before
|
||||
/// extraction (Audio_FollowUps.md §2). The current implementation trusts
|
||||
/// HTTPS + the system trust chain only.
|
||||
/// </summary>
|
||||
public sealed class PiperVoiceManager
|
||||
{
|
||||
private readonly string _voicesDirectory;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
// Per-voice single-flight gate: prevents racing the same voice download
|
||||
// from two callers (e.g. UI and a programmatic caller). Static so two
|
||||
// PiperVoiceManager instances over the same data directory still
|
||||
// coalesce against the same in-flight task.
|
||||
private static readonly ConcurrentDictionary<string, Lazy<Task>> InFlightDownloads = new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>
|
||||
/// Curated catalog of Piper voices we offer in the UI. Each entry is
|
||||
/// a sherpa-onnx pre-packaged tarball from the project's GitHub
|
||||
/// releases. To add a voice: pick its key from
|
||||
/// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models,
|
||||
/// download the tarball, compute its SHA-256, and pin it below.
|
||||
/// Sizes shown in the UI are approximate compressed sizes.
|
||||
///
|
||||
/// SECURITY — pinned SHA-256 hashes (lowercase hex) verified against
|
||||
/// the sherpa-onnx GitHub release on 2026-05-05. Downloads with a
|
||||
/// different hash are rejected and the partial tarball is deleted.
|
||||
/// Before any public release: re-verify each hash from an independent
|
||||
/// source and document provenance in Audio_FollowUps.md §2.
|
||||
/// </summary>
|
||||
public static readonly PiperVoiceInfo[] AvailableVoices =
|
||||
[
|
||||
new("en_US-amy-low", "English (US) — Amy (low quality, fast)", "en-US",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2",
|
||||
"c70f5284a09a7fd4ed203b39b2ff51cac1432b422b852eb647b481dade3cf639"),
|
||||
new("en_US-libritts-high","English (US) — LibriTTS (high quality)", "en-US",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts-high.tar.bz2",
|
||||
"d9d35056703fd38ed38e95c202a50f603fefdc8a92a7b6332c4f1a41616eac72"),
|
||||
new("en_GB-alan-low", "English (GB) — Alan (low quality, fast)", "en-GB",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-alan-low.tar.bz2",
|
||||
"1308e730b7a12c3b64b669d65daa0138fcb83b1a086edee92fa9fa68cb0290dd"),
|
||||
new("fr_FR-siwis-low", "Français (FR) — Siwis (low quality, fast)","fr-FR",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-fr_FR-siwis-low.tar.bz2",
|
||||
"3d69170c160c8375c4123901a72a3845222b39456d39ab74f5bbd7310952b5af"),
|
||||
new("de_DE-thorsten-low","Deutsch (DE) — Thorsten (low quality)", "de-DE",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-de_DE-thorsten-low.tar.bz2",
|
||||
"41fab35910fdcec4696b031951d8fd6c262e594cf77b35e1068fadbeb5a091a6"),
|
||||
new("zh_CN-huayan-medium","中文 (CN) — Huayan (medium quality)", "zh-CN",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-zh_CN-huayan-medium.tar.bz2",
|
||||
"dbdfec42b91d9cee31cce9ff4b3e9c305eb6fbf60546d071f7e46273554cce6b"),
|
||||
];
|
||||
|
||||
public PiperVoiceManager(string dataDirectory, IOpenClawLogger logger)
|
||||
{
|
||||
_voicesDirectory = Path.Combine(dataDirectory, "models", "piper");
|
||||
_logger = logger;
|
||||
Directory.CreateDirectory(_voicesDirectory);
|
||||
}
|
||||
|
||||
/// <summary>Root directory where this voice's files live (created lazily).</summary>
|
||||
public string GetVoiceDirectory(string voiceId)
|
||||
{
|
||||
var info = FindVoice(voiceId);
|
||||
return Path.Combine(_voicesDirectory, info.VoiceId);
|
||||
}
|
||||
|
||||
/// <summary>Path to the .onnx model file for a downloaded voice.</summary>
|
||||
public string GetModelPath(string voiceId)
|
||||
{
|
||||
var dir = GetVoiceDirectory(voiceId);
|
||||
// sherpa-onnx tarballs put files at the root of the voice dir; the
|
||||
// model file is named after the voice id.
|
||||
return Path.Combine(dir, $"{voiceId}.onnx");
|
||||
}
|
||||
|
||||
/// <summary>Path to tokens.txt (phoneme map).</summary>
|
||||
public string GetTokensPath(string voiceId) => Path.Combine(GetVoiceDirectory(voiceId), "tokens.txt");
|
||||
|
||||
/// <summary>Path to the espeak-ng-data directory bundled with this voice.</summary>
|
||||
public string GetEspeakDataDir(string voiceId) => Path.Combine(GetVoiceDirectory(voiceId), "espeak-ng-data");
|
||||
|
||||
/// <summary>True when all three files are present on disk.</summary>
|
||||
public bool IsVoiceDownloaded(string voiceId)
|
||||
{
|
||||
try
|
||||
{
|
||||
return File.Exists(GetModelPath(voiceId))
|
||||
&& File.Exists(GetTokensPath(voiceId))
|
||||
&& Directory.Exists(GetEspeakDataDir(voiceId));
|
||||
}
|
||||
catch
|
||||
{
|
||||
// FindVoice throws on unknown voiceId — treat as not-downloaded.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Download and extract a Piper voice from the sherpa-onnx release.
|
||||
/// Reports progress as bytes downloaded / total bytes (extraction
|
||||
/// progress is not reported separately).
|
||||
/// Per-voice single-flight: concurrent calls for the same voice await
|
||||
/// the in-flight download instead of racing on the same temp tarball.
|
||||
/// </summary>
|
||||
public Task DownloadVoiceAsync(
|
||||
string voiceId,
|
||||
IProgress<(long downloaded, long total)>? progress = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var info = FindVoice(voiceId);
|
||||
if (IsVoiceDownloaded(info.VoiceId))
|
||||
{
|
||||
_logger.Info($"Piper voice '{info.VoiceId}' already downloaded");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
// Preflight: bail out before downloading 50-150 MB if the OS isn't
|
||||
// capable of extracting the .tar.bz2 we'd produce. tar.exe ships with
|
||||
// Windows 10 1803+; older systems would fail at the extract step
|
||||
// after a long, wasted download.
|
||||
EnsureExtractorAvailable();
|
||||
|
||||
var key = info.VoiceId;
|
||||
return SingleFlightDownload.RunAsync(
|
||||
InFlightDownloads,
|
||||
key,
|
||||
token => DownloadVoiceCoreAsync(info, progress, token),
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
private async Task DownloadVoiceCoreAsync(
|
||||
PiperVoiceInfo info,
|
||||
IProgress<(long downloaded, long total)>? progress,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// SECURITY: refuse to install any voice that doesn't have a pinned
|
||||
// hash. See Audio_FollowUps.md §2.
|
||||
if (string.IsNullOrWhiteSpace(info.Sha256))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Piper voice '{info.VoiceId}' has no pinned SHA-256; refusing to download. " +
|
||||
"Add a verified hash to AvailableVoices before enabling this voice.");
|
||||
}
|
||||
|
||||
var voiceDir = Path.Combine(_voicesDirectory, info.VoiceId);
|
||||
Directory.CreateDirectory(voiceDir);
|
||||
var tarballPath = Path.Combine(voiceDir, $"{info.VoiceId}.tar.bz2.tmp");
|
||||
_logger.Info($"Downloading Piper voice '{info.VoiceId}' from {info.DownloadUrl}");
|
||||
|
||||
try
|
||||
{
|
||||
using var httpClient = new HttpClient();
|
||||
httpClient.Timeout = TimeSpan.FromMinutes(10);
|
||||
using var response = await httpClient.GetAsync(info.DownloadUrl, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var totalBytes = response.Content.Headers.ContentLength ?? 0;
|
||||
using (var contentStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false))
|
||||
using (var fileStream = new FileStream(tarballPath, FileMode.Create, FileAccess.Write, FileShare.None, 81920))
|
||||
{
|
||||
var buffer = new byte[81920];
|
||||
long downloaded = 0;
|
||||
int bytesRead;
|
||||
while ((bytesRead = await contentStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0)
|
||||
{
|
||||
await fileStream.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken).ConfigureAwait(false);
|
||||
downloaded += bytesRead;
|
||||
progress?.Report((downloaded, totalBytes));
|
||||
}
|
||||
}
|
||||
|
||||
// SECURITY: verify SHA-256 of the downloaded tarball BEFORE we
|
||||
// hand it to the extractor. tar reads file contents to disk; an
|
||||
// attacker-controlled tarball could plant arbitrary files (path
|
||||
// traversal aside, the .onnx model itself is loaded into the
|
||||
// process). Fail closed on mismatch — partial dir cleanup runs
|
||||
// in the catch block below.
|
||||
await VerifyHashAsync(tarballPath, info.Sha256, info.VoiceId, cancellationToken);
|
||||
|
||||
_logger.Info($"Extracting Piper voice '{info.VoiceId}'");
|
||||
ExtractTarBz2(tarballPath, voiceDir, cancellationToken);
|
||||
|
||||
// Verify the extraction produced the files we expect; if not,
|
||||
// tear the half-extracted dir down so a retry starts clean.
|
||||
if (!IsVoiceDownloaded(info.VoiceId))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Extraction of Piper voice '{info.VoiceId}' did not produce the expected layout.");
|
||||
}
|
||||
|
||||
_logger.Info($"Piper voice '{info.VoiceId}' verified and ready at {voiceDir}");
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best-effort cleanup — leaves the user able to retry without
|
||||
// leftover partial files.
|
||||
try { if (File.Exists(tarballPath)) File.Delete(tarballPath); } catch { /* swallow */ }
|
||||
try { if (Directory.Exists(voiceDir) && !IsVoiceDownloaded(info.VoiceId)) Directory.Delete(voiceDir, recursive: true); } catch { /* swallow */ }
|
||||
throw;
|
||||
}
|
||||
finally
|
||||
{
|
||||
try { if (File.Exists(tarballPath)) File.Delete(tarballPath); } catch { /* swallow */ }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute SHA-256 of <paramref name="filePath"/> and compare to
|
||||
/// <paramref name="expectedHex"/>. Throws on mismatch (caller is
|
||||
/// expected to delete the file). Does not echo the actual hash to
|
||||
/// avoid handing attackers a confirmation oracle.
|
||||
/// </summary>
|
||||
private static async Task VerifyHashAsync(string filePath, string expectedHex, string assetName, CancellationToken cancellationToken)
|
||||
{
|
||||
using var sha = System.Security.Cryptography.SHA256.Create();
|
||||
await using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, 81920, useAsync: true);
|
||||
var actual = await sha.ComputeHashAsync(stream, cancellationToken);
|
||||
var actualHex = Convert.ToHexString(actual).ToLowerInvariant();
|
||||
if (!string.Equals(actualHex, expectedHex, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
throw new System.Security.SecurityException(
|
||||
$"Piper voice '{assetName}' failed integrity check. The downloaded tarball does not match the pinned SHA-256.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Delete a downloaded voice directory.</summary>
|
||||
public bool DeleteVoice(string voiceId)
|
||||
{
|
||||
var info = FindVoice(voiceId);
|
||||
var dir = Path.Combine(_voicesDirectory, info.VoiceId);
|
||||
if (!Directory.Exists(dir)) return false;
|
||||
Directory.Delete(dir, recursive: true);
|
||||
_logger.Info($"Deleted Piper voice '{info.VoiceId}'");
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>Total disk usage of a downloaded voice, or 0 if not downloaded.</summary>
|
||||
public long GetVoiceSize(string voiceId)
|
||||
{
|
||||
var info = FindVoice(voiceId);
|
||||
var dir = Path.Combine(_voicesDirectory, info.VoiceId);
|
||||
if (!Directory.Exists(dir)) return 0;
|
||||
long total = 0;
|
||||
foreach (var f in Directory.EnumerateFiles(dir, "*", SearchOption.AllDirectories))
|
||||
{
|
||||
try { total += new FileInfo(f).Length; } catch { /* skip */ }
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Probe the bundled OS tar.exe used by <see cref="ExtractTarBz2"/>.
|
||||
/// Throws a clear error before any network I/O happens so users on
|
||||
/// downlevel Windows aren't left with a half-downloaded tarball.
|
||||
/// </summary>
|
||||
private static void EnsureExtractorAvailable()
|
||||
{
|
||||
try
|
||||
{
|
||||
var psi = new System.Diagnostics.ProcessStartInfo
|
||||
{
|
||||
FileName = "tar",
|
||||
ArgumentList = { "--version" },
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
};
|
||||
using var proc = System.Diagnostics.Process.Start(psi);
|
||||
if (proc == null)
|
||||
{
|
||||
throw new InvalidOperationException("tar.exe not found on PATH.");
|
||||
}
|
||||
proc.WaitForExit(2000);
|
||||
if (!proc.HasExited)
|
||||
{
|
||||
try { proc.Kill(entireProcessTree: true); } catch { /* swallow */ }
|
||||
throw new InvalidOperationException("tar.exe didn't respond to --version.");
|
||||
}
|
||||
if (proc.ExitCode != 0)
|
||||
{
|
||||
throw new InvalidOperationException($"tar.exe --version returned exit code {proc.ExitCode}.");
|
||||
}
|
||||
}
|
||||
catch (System.ComponentModel.Win32Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
"Piper voices need bundled tar (Windows 10 1803+). " +
|
||||
"Your system doesn't have tar on PATH; please update Windows or install a tar utility.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract a .tar.bz2 archive in-place. We use SharpCompress (already a
|
||||
/// transitive dependency via PiperSharp's ecosystem, but explicit here)
|
||||
/// so we don't need to shell out to tar.exe.
|
||||
/// </summary>
|
||||
private static void ExtractTarBz2(string archivePath, string destinationDir, CancellationToken cancellationToken)
|
||||
{
|
||||
// SharpCompress isn't a direct dep of OpenClaw.Shared today; we
|
||||
// intentionally use the BCL .tar reader on top of a bzip2 stream
|
||||
// from a small inline implementation. Keeping the dep surface small
|
||||
// matters in this assembly because everything here is also referenced
|
||||
// from OpenClaw.Cli.
|
||||
//
|
||||
// .NET 7+ ships System.Formats.Tar; bzip2 is not in the BCL, so we
|
||||
// bring it in via a thin wrapper. For now the simplest-correct path
|
||||
// is to call out to the OS-bundled `tar` (Win10 1803+ ships it),
|
||||
// which transparently handles bz2.
|
||||
var psi = new System.Diagnostics.ProcessStartInfo
|
||||
{
|
||||
FileName = "tar",
|
||||
ArgumentList = { "-xjf", archivePath, "-C", destinationDir, "--strip-components=1" },
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
RedirectStandardError = true,
|
||||
};
|
||||
using var proc = System.Diagnostics.Process.Start(psi)
|
||||
?? throw new InvalidOperationException("Could not start tar to extract Piper voice");
|
||||
|
||||
// Cancellation: kill the tar process if requested.
|
||||
using var reg = cancellationToken.Register(() => { try { proc.Kill(entireProcessTree: true); } catch { /* swallow */ } });
|
||||
|
||||
proc.WaitForExit();
|
||||
if (proc.ExitCode != 0)
|
||||
{
|
||||
var err = proc.StandardError.ReadToEnd();
|
||||
throw new InvalidOperationException($"tar extraction failed (exit {proc.ExitCode}): {err}");
|
||||
}
|
||||
}
|
||||
|
||||
private static PiperVoiceInfo FindVoice(string voiceId)
|
||||
{
|
||||
foreach (var v in AvailableVoices)
|
||||
{
|
||||
if (string.Equals(v.VoiceId, voiceId, StringComparison.OrdinalIgnoreCase))
|
||||
return v;
|
||||
}
|
||||
var available = string.Join(", ", AvailableVoicesIds());
|
||||
throw new ArgumentException($"Unknown Piper voice: '{voiceId}'. Available: {available}");
|
||||
}
|
||||
|
||||
private static IEnumerable<string> AvailableVoicesIds()
|
||||
{
|
||||
foreach (var v in AvailableVoices) yield return v.VoiceId;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Metadata about a Piper voice variant.</summary>
|
||||
/// <param name="VoiceId">Short id, e.g. "en_US-amy-low".</param>
|
||||
/// <param name="DisplayName">Human-readable label for UI.</param>
|
||||
/// <param name="LanguageTag">BCP-47 tag.</param>
|
||||
/// <param name="DownloadUrl">HTTPS URL of the .tar.bz2.</param>
|
||||
/// <param name="Sha256">Pinned lowercase hex SHA-256 of the downloaded
|
||||
/// tarball. MUST be set; downloads are refused when null. See the catalog
|
||||
/// for the "verified on" date — these need re-verification before any
|
||||
/// public release (see Audio_FollowUps.md §2).</param>
|
||||
public sealed record PiperVoiceInfo(
|
||||
string VoiceId,
|
||||
string DisplayName,
|
||||
string LanguageTag,
|
||||
string DownloadUrl,
|
||||
string? Sha256);
|
||||
28
src/OpenClaw.Shared/Audio/SileroVadModelManifest.cs
Normal file
28
src/OpenClaw.Shared/Audio/SileroVadModelManifest.cs
Normal file
@ -0,0 +1,28 @@
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Pinned descriptor for the Silero VAD ONNX model that the audio
|
||||
/// pipeline auto-downloads on first use.
|
||||
///
|
||||
/// SECURITY — same fail-closed verification discipline as
|
||||
/// <see cref="WhisperModelManager"/> and <see cref="PiperVoiceManager"/>:
|
||||
/// the runtime checks the downloaded file's SHA-256 against
|
||||
/// <see cref="Sha256"/> before installing it. The pinned hash here was
|
||||
/// captured against the upstream raw URL on 2026-05-05; re-verify from
|
||||
/// an independent source before any public release (Audio_FollowUps.md
|
||||
/// §2 captures the broader signed-manifest plan).
|
||||
/// </summary>
|
||||
public static class SileroVadModelManifest
|
||||
{
|
||||
public const string FileName = "silero_vad.onnx";
|
||||
|
||||
public const string DownloadUrl =
|
||||
"https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx";
|
||||
|
||||
/// <summary>Lowercase hex SHA-256 of the canonical upstream file.</summary>
|
||||
public const string Sha256 = "1a153a22f4509e292a94e67d6f9b85e8deb25b4988682b7e174c65279d8788e3";
|
||||
|
||||
/// <summary>Approximate compressed size in bytes (UI hint; actual size
|
||||
/// is asserted via the SHA-256 check).</summary>
|
||||
public const long ApproximateSizeBytes = 2_327_524;
|
||||
}
|
||||
52
src/OpenClaw.Shared/Audio/SingleFlightDownload.cs
Normal file
52
src/OpenClaw.Shared/Audio/SingleFlightDownload.cs
Normal file
@ -0,0 +1,52 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
internal static class SingleFlightDownload
|
||||
{
|
||||
public static Task RunAsync(
|
||||
ConcurrentDictionary<string, Lazy<Task>> inFlight,
|
||||
string key,
|
||||
Func<CancellationToken, Task> startDownload,
|
||||
CancellationToken waitCancellationToken = default)
|
||||
{
|
||||
var candidate = new Lazy<Task>(() =>
|
||||
{
|
||||
try
|
||||
{
|
||||
return startDownload(CancellationToken.None)
|
||||
?? Task.FromException(new InvalidOperationException("Download factory returned null."));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Task.FromException(ex);
|
||||
}
|
||||
}, LazyThreadSafetyMode.ExecutionAndPublication);
|
||||
|
||||
var lazy = inFlight.GetOrAdd(key, candidate);
|
||||
Task task;
|
||||
try
|
||||
{
|
||||
task = lazy.Value;
|
||||
}
|
||||
catch
|
||||
{
|
||||
inFlight.TryRemove(new KeyValuePair<string, Lazy<Task>>(key, lazy));
|
||||
throw;
|
||||
}
|
||||
|
||||
_ = task.ContinueWith(
|
||||
_ => inFlight.TryRemove(new KeyValuePair<string, Lazy<Task>>(key, lazy)),
|
||||
CancellationToken.None,
|
||||
TaskContinuationOptions.ExecuteSynchronously,
|
||||
TaskScheduler.Default);
|
||||
|
||||
return waitCancellationToken.CanBeCanceled
|
||||
? task.WaitAsync(waitCancellationToken)
|
||||
: task;
|
||||
}
|
||||
}
|
||||
182
src/OpenClaw.Shared/Audio/SpeechToTextService.cs
Normal file
182
src/OpenClaw.Shared/Audio/SpeechToTextService.cs
Normal file
@ -0,0 +1,182 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Whisper.net;
|
||||
using Whisper.net.Ggml;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Wraps Whisper.net for speech-to-text transcription.
|
||||
/// Lazily loads the model on first use and caches the factory.
|
||||
/// Thread-safe: concurrent calls are serialized by a semaphore.
|
||||
/// </summary>
|
||||
public sealed class SpeechToTextService : IDisposable
|
||||
{
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||
private WhisperFactory? _factory;
|
||||
private string? _loadedModelPath;
|
||||
|
||||
public bool IsModelLoaded => _factory != null;
|
||||
public string? LoadedModelPath => _loadedModelPath;
|
||||
|
||||
public SpeechToTextService(IOpenClawLogger logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>Load (or reload) the Whisper model from disk.</summary>
|
||||
public void LoadModel(string modelPath)
|
||||
{
|
||||
if (!System.IO.File.Exists(modelPath))
|
||||
throw new System.IO.FileNotFoundException($"Whisper model not found: {modelPath}");
|
||||
|
||||
_factory?.Dispose();
|
||||
_factory = WhisperFactory.FromPath(modelPath);
|
||||
_loadedModelPath = modelPath;
|
||||
_logger.Info($"Whisper model loaded: {modelPath}");
|
||||
}
|
||||
|
||||
/// <summary>Unload the current model and free memory.</summary>
|
||||
public void UnloadModel()
|
||||
{
|
||||
_factory?.Dispose();
|
||||
_factory = null;
|
||||
_loadedModelPath = null;
|
||||
_logger.Info("Whisper model unloaded");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Transcribe raw 16 kHz mono PCM float samples.
|
||||
/// Returns all detected segments.
|
||||
/// </summary>
|
||||
public async Task<List<TranscriptionResult>> TranscribeAsync(
|
||||
float[] samples,
|
||||
string language = "auto",
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_factory == null)
|
||||
throw new InvalidOperationException("No Whisper model is loaded. Call LoadModel first.");
|
||||
|
||||
await _gate.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
// Whisper.net's WithLanguage expects either "auto" or a 2-letter
|
||||
// ISO 639-1 code. The capability validator accepts the broader
|
||||
// BCP-47 shape ("en-US", "zh-Hans-CN") because that's what the
|
||||
// public docs advertise; normalize down here so Whisper actually
|
||||
// sees something it understands.
|
||||
var whisperLang = NormalizeForWhisper(language);
|
||||
var builder = _factory.CreateBuilder()
|
||||
.WithLanguage(whisperLang)
|
||||
.WithThreads(Math.Max(1, Environment.ProcessorCount / 2));
|
||||
|
||||
using var processor = builder.Build();
|
||||
|
||||
using var wavStream = PcmToWavStream(samples, 16000);
|
||||
|
||||
var results = new List<TranscriptionResult>();
|
||||
await foreach (var segment in processor.ProcessAsync(wavStream, cancellationToken))
|
||||
{
|
||||
var text = segment.Text?.Trim();
|
||||
if (!string.IsNullOrEmpty(text))
|
||||
{
|
||||
results.Add(new TranscriptionResult
|
||||
{
|
||||
Text = text,
|
||||
Start = segment.Start,
|
||||
End = segment.End,
|
||||
Language = whisperLang
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_gate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert raw 16-bit PCM float samples to a WAV MemoryStream.
|
||||
/// Whisper.net processes WAV streams natively.
|
||||
/// </summary>
|
||||
private static System.IO.MemoryStream PcmToWavStream(float[] samples, int sampleRate)
|
||||
{
|
||||
var ms = new System.IO.MemoryStream();
|
||||
using var writer = new System.IO.BinaryWriter(ms, System.Text.Encoding.UTF8, leaveOpen: true);
|
||||
|
||||
int bitsPerSample = 16;
|
||||
short channels = 1;
|
||||
int byteRate = sampleRate * channels * bitsPerSample / 8;
|
||||
short blockAlign = (short)(channels * bitsPerSample / 8);
|
||||
int dataSize = samples.Length * blockAlign;
|
||||
|
||||
// RIFF header
|
||||
writer.Write("RIFF"u8);
|
||||
writer.Write(36 + dataSize);
|
||||
writer.Write("WAVE"u8);
|
||||
|
||||
// fmt subchunk
|
||||
writer.Write("fmt "u8);
|
||||
writer.Write(16); // subchunk size
|
||||
writer.Write((short)1); // PCM format
|
||||
writer.Write(channels);
|
||||
writer.Write(sampleRate);
|
||||
writer.Write(byteRate);
|
||||
writer.Write(blockAlign);
|
||||
writer.Write((short)bitsPerSample);
|
||||
|
||||
// data subchunk
|
||||
writer.Write("data"u8);
|
||||
writer.Write(dataSize);
|
||||
|
||||
// Convert float [-1.0, 1.0] to int16
|
||||
foreach (var sample in samples)
|
||||
{
|
||||
var clamped = Math.Clamp(sample, -1.0f, 1.0f);
|
||||
var int16 = (short)(clamped * 32767);
|
||||
writer.Write(int16);
|
||||
}
|
||||
|
||||
writer.Flush();
|
||||
ms.Position = 0;
|
||||
return ms;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reduce a BCP-47 tag (e.g. "en-US", "zh-Hans-CN") to the 2-letter
|
||||
/// language subtag that Whisper.net's WithLanguage call expects.
|
||||
/// "auto" passes through unchanged. Returns "auto" for nulls/whitespace
|
||||
/// or values that don't begin with at least 2 ASCII letters.
|
||||
/// </summary>
|
||||
internal static string NormalizeForWhisper(string? language)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(language)) return "auto";
|
||||
var trimmed = language.Trim();
|
||||
if (string.Equals(trimmed, "auto", StringComparison.OrdinalIgnoreCase)) return "auto";
|
||||
|
||||
// Take everything up to the first '-' (the primary subtag) and lowercase.
|
||||
var dash = trimmed.IndexOf('-');
|
||||
var primary = (dash >= 0 ? trimmed[..dash] : trimmed).ToLowerInvariant();
|
||||
|
||||
// Whisper expects 2-letter ISO 639-1. If the caller handed us a
|
||||
// 3-letter ISO 639-3 tag (no good cross-walk without a table) or
|
||||
// garbage, fall back to auto-detection rather than silently
|
||||
// sending an invalid value.
|
||||
if (primary.Length != 2 || primary[0] is < 'a' or > 'z' || primary[1] is < 'a' or > 'z')
|
||||
return "auto";
|
||||
|
||||
return primary;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_factory?.Dispose();
|
||||
_gate.Dispose();
|
||||
}
|
||||
}
|
||||
108
src/OpenClaw.Shared/Audio/VoiceActivityDetector.cs
Normal file
108
src/OpenClaw.Shared/Audio/VoiceActivityDetector.cs
Normal file
@ -0,0 +1,108 @@
|
||||
using System;
|
||||
using Microsoft.ML.OnnxRuntime;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Voice Activity Detection using Silero VAD ONNX model.
|
||||
/// Processes 16 kHz mono audio in 512-sample chunks (~32 ms each)
|
||||
/// and returns a speech probability per chunk.
|
||||
/// </summary>
|
||||
public sealed class VoiceActivityDetector : IDisposable
|
||||
{
|
||||
private InferenceSession? _session;
|
||||
private float[] _state; // internal RNN state: shape [2, 1, 128]
|
||||
private readonly int _stateSize;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
|
||||
/// <summary>Expected sample rate for input audio.</summary>
|
||||
public const int SampleRate = 16000;
|
||||
|
||||
/// <summary>Number of samples per VAD chunk (512 @ 16 kHz = 32 ms).</summary>
|
||||
public const int ChunkSamples = 512;
|
||||
|
||||
public bool IsLoaded => _session != null;
|
||||
|
||||
public VoiceActivityDetector(IOpenClawLogger logger)
|
||||
{
|
||||
_logger = logger;
|
||||
_stateSize = 2 * 1 * 128;
|
||||
_state = new float[_stateSize];
|
||||
}
|
||||
|
||||
/// <summary>Load the Silero VAD ONNX model from disk.</summary>
|
||||
public void LoadModel(string modelPath)
|
||||
{
|
||||
if (!System.IO.File.Exists(modelPath))
|
||||
throw new System.IO.FileNotFoundException($"VAD model not found: {modelPath}");
|
||||
|
||||
var opts = new SessionOptions
|
||||
{
|
||||
InterOpNumThreads = 1,
|
||||
IntraOpNumThreads = 1,
|
||||
EnableCpuMemArena = true
|
||||
};
|
||||
opts.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
|
||||
|
||||
_session?.Dispose();
|
||||
_session = new InferenceSession(modelPath, opts);
|
||||
ResetState();
|
||||
_logger.Info($"Silero VAD model loaded: {modelPath}");
|
||||
}
|
||||
|
||||
/// <summary>Reset the internal RNN state (call between utterances).</summary>
|
||||
public void ResetState()
|
||||
{
|
||||
Array.Clear(_state, 0, _state.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Process a single chunk of audio and return the speech probability (0.0–1.0).
|
||||
/// Input must be exactly <see cref="ChunkSamples"/> float samples at 16 kHz.
|
||||
/// </summary>
|
||||
public float ProcessChunk(float[] audioChunk)
|
||||
{
|
||||
if (_session == null)
|
||||
throw new InvalidOperationException("VAD model not loaded. Call LoadModel first.");
|
||||
|
||||
if (audioChunk.Length != ChunkSamples)
|
||||
throw new ArgumentException($"Audio chunk must be exactly {ChunkSamples} samples, got {audioChunk.Length}");
|
||||
|
||||
// Build input tensors matching Silero VAD v5 expected shapes.
|
||||
// See: github.com/snakers4/silero-vad/blob/master/examples/csharp/SileroVadOnnxModel.cs
|
||||
var inputTensor = new DenseTensor<float>(audioChunk, new[] { 1, ChunkSamples });
|
||||
var srTensor = new DenseTensor<long>(new long[] { SampleRate }, new[] { 1 });
|
||||
var stateTensor = new DenseTensor<float>(_state, new[] { 2, 1, 128 });
|
||||
|
||||
using var results = _session.Run(new List<NamedOnnxValue>
|
||||
{
|
||||
NamedOnnxValue.CreateFromTensor("input", inputTensor),
|
||||
NamedOnnxValue.CreateFromTensor("sr", srTensor),
|
||||
NamedOnnxValue.CreateFromTensor("state", stateTensor)
|
||||
});
|
||||
|
||||
float probability = 0f;
|
||||
foreach (var result in results)
|
||||
{
|
||||
if (result.Name == "output")
|
||||
{
|
||||
var tensor = result.AsTensor<float>();
|
||||
probability = tensor.Length > 0 ? tensor.GetValue(0) : 0f;
|
||||
}
|
||||
else if (result.Name == "stateN")
|
||||
{
|
||||
var newState = result.AsTensor<float>();
|
||||
for (int i = 0; i < _stateSize && i < newState.Length; i++)
|
||||
_state[i] = newState.GetValue(i);
|
||||
}
|
||||
}
|
||||
|
||||
return probability;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_session?.Dispose();
|
||||
}
|
||||
}
|
||||
223
src/OpenClaw.Shared/Audio/WhisperModelManager.cs
Normal file
223
src/OpenClaw.Shared/Audio/WhisperModelManager.cs
Normal file
@ -0,0 +1,223 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Net.Http;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Manages Whisper GGML model downloads, storage, and lifecycle.
|
||||
/// Models are stored in <c>%APPDATA%\OpenClawTray\models\</c> (or the
|
||||
/// configured data directory).
|
||||
/// </summary>
|
||||
public sealed class WhisperModelManager
|
||||
{
|
||||
private readonly string _modelsDirectory;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
// Per-model single-flight gate: a manual auto-download (VoiceService
|
||||
// EnsureInitializedAsync) and a UI-triggered download for the same
|
||||
// model would otherwise both write the same .tmp file. Static so an
|
||||
// additional manager instance constructed elsewhere (e.g. the Settings
|
||||
// page's status-only check) doesn't bypass the lock.
|
||||
private static readonly ConcurrentDictionary<string, Lazy<Task>> InFlightDownloads = new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>
|
||||
/// Known Whisper model definitions.
|
||||
///
|
||||
/// SECURITY — pinned SHA-256 hashes (lowercase hex) verified against
|
||||
/// HuggingFace on 2026-05-05. Downloads with a different hash are
|
||||
/// rejected and the partial file is deleted. Before any public release:
|
||||
/// re-verify each hash from an independent source and document the
|
||||
/// provenance in Audio_FollowUps.md §2 (also consider replacing this
|
||||
/// inline table with a signed manifest).
|
||||
/// </summary>
|
||||
public static readonly WhisperModelInfo[] AvailableModels =
|
||||
[
|
||||
new("ggml-tiny.bin", "tiny", 77_691_713, "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin",
|
||||
"be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21"),
|
||||
new("ggml-base.bin", "base", 147_951_465, "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin",
|
||||
"60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"),
|
||||
new("ggml-small.bin", "small", 487_601_967, "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
|
||||
"1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b"),
|
||||
];
|
||||
|
||||
public WhisperModelManager(string dataDirectory, IOpenClawLogger logger)
|
||||
{
|
||||
_modelsDirectory = Path.Combine(dataDirectory, "models");
|
||||
_logger = logger;
|
||||
Directory.CreateDirectory(_modelsDirectory);
|
||||
}
|
||||
|
||||
/// <summary>Full file path for a given model name.</summary>
|
||||
public string GetModelPath(string modelName)
|
||||
{
|
||||
var info = FindModel(modelName);
|
||||
return Path.Combine(_modelsDirectory, info.FileName);
|
||||
}
|
||||
|
||||
/// <summary>Check whether a model file already exists on disk.</summary>
|
||||
public bool IsModelDownloaded(string modelName)
|
||||
{
|
||||
var path = GetModelPath(modelName);
|
||||
return File.Exists(path);
|
||||
}
|
||||
|
||||
/// <summary>Get the size of a downloaded model, or 0 if not downloaded.</summary>
|
||||
public long GetModelSize(string modelName)
|
||||
{
|
||||
var path = GetModelPath(modelName);
|
||||
return File.Exists(path) ? new FileInfo(path).Length : 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Download a model from HuggingFace if not already present.
|
||||
/// Reports progress as bytes downloaded / total bytes.
|
||||
/// Per-model single-flight: concurrent calls for the same model await
|
||||
/// the in-flight download instead of racing on the same .tmp file.
|
||||
/// </summary>
|
||||
public Task DownloadModelAsync(
|
||||
string modelName,
|
||||
IProgress<(long downloaded, long total)>? progress = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var info = FindModel(modelName);
|
||||
var destPath = Path.Combine(_modelsDirectory, info.FileName);
|
||||
|
||||
if (File.Exists(destPath))
|
||||
{
|
||||
_logger.Info($"Model '{modelName}' already exists at {destPath}");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
// Use the canonical key (FileName) so two callers that pass "base"
|
||||
// and "ggml-base.bin" still coalesce.
|
||||
var key = info.FileName;
|
||||
return SingleFlightDownload.RunAsync(
|
||||
InFlightDownloads,
|
||||
key,
|
||||
token => DownloadModelCoreAsync(info, destPath, progress, token),
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
private async Task DownloadModelCoreAsync(
|
||||
WhisperModelInfo info,
|
||||
string destPath,
|
||||
IProgress<(long downloaded, long total)>? progress,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// SECURITY: a missing pinned hash is treated as a hard failure so we
|
||||
// never install an unverified asset. The catalog above pins all
|
||||
// shipped models; if you add a new one without a hash, this is the
|
||||
// place that refuses to download it. See Audio_FollowUps.md §2.
|
||||
if (string.IsNullOrWhiteSpace(info.Sha256))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Whisper model '{info.Name}' has no pinned SHA-256; refusing to download. " +
|
||||
"Add a verified hash to AvailableModels before enabling this model.");
|
||||
}
|
||||
|
||||
_logger.Info($"Downloading model '{info.Name}' from {info.DownloadUrl}");
|
||||
var tempPath = destPath + ".tmp";
|
||||
|
||||
try
|
||||
{
|
||||
using var httpClient = new HttpClient();
|
||||
httpClient.Timeout = TimeSpan.FromMinutes(30);
|
||||
using var response = await httpClient.GetAsync(info.DownloadUrl, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var totalBytes = response.Content.Headers.ContentLength ?? info.ApproximateSizeBytes;
|
||||
using (var contentStream = await response.Content.ReadAsStreamAsync(cancellationToken))
|
||||
using (var fileStream = new FileStream(tempPath, FileMode.Create, FileAccess.Write, FileShare.None, 81920))
|
||||
{
|
||||
var buffer = new byte[81920];
|
||||
long downloadedBytes = 0;
|
||||
int bytesRead;
|
||||
|
||||
while ((bytesRead = await contentStream.ReadAsync(buffer, cancellationToken)) > 0)
|
||||
{
|
||||
await fileStream.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken);
|
||||
downloadedBytes += bytesRead;
|
||||
progress?.Report((downloadedBytes, totalBytes));
|
||||
}
|
||||
|
||||
await fileStream.FlushAsync(cancellationToken);
|
||||
}
|
||||
|
||||
// SECURITY: verify SHA-256 BEFORE the atomic rename, so a
|
||||
// tampered file never lands at the canonical path. On mismatch
|
||||
// we delete the temp file (no partial install) and surface a
|
||||
// sanitized error — we deliberately do NOT echo the actual
|
||||
// hash because that gives an attacker a confirmation oracle.
|
||||
await VerifyHashAsync(tempPath, info.Sha256, info.Name, cancellationToken);
|
||||
|
||||
File.Move(tempPath, destPath, overwrite: true);
|
||||
_logger.Info($"Model '{info.Name}' downloaded and verified");
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Clean up partial download
|
||||
try { if (File.Exists(tempPath)) File.Delete(tempPath); } catch { /* best effort */ }
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute SHA-256 of <paramref name="filePath"/> and compare to
|
||||
/// <paramref name="expectedHex"/>. Throws on mismatch (and the caller
|
||||
/// is expected to delete the file). Does not echo the actual hash to
|
||||
/// avoid handing attackers a confirmation oracle.
|
||||
/// </summary>
|
||||
private static async Task VerifyHashAsync(string filePath, string expectedHex, string assetName, CancellationToken cancellationToken)
|
||||
{
|
||||
using var sha = System.Security.Cryptography.SHA256.Create();
|
||||
await using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, 81920, useAsync: true);
|
||||
var actual = await sha.ComputeHashAsync(stream, cancellationToken);
|
||||
var actualHex = Convert.ToHexString(actual).ToLowerInvariant();
|
||||
if (!string.Equals(actualHex, expectedHex, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
throw new System.Security.SecurityException(
|
||||
$"Whisper model '{assetName}' failed integrity check. The downloaded file does not match the pinned SHA-256.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Delete a downloaded model file.</summary>
|
||||
public bool DeleteModel(string modelName)
|
||||
{
|
||||
var path = GetModelPath(modelName);
|
||||
if (!File.Exists(path)) return false;
|
||||
File.Delete(path);
|
||||
_logger.Info($"Deleted model '{modelName}'");
|
||||
return true;
|
||||
}
|
||||
|
||||
private static WhisperModelInfo FindModel(string modelName)
|
||||
{
|
||||
foreach (var m in AvailableModels)
|
||||
{
|
||||
if (string.Equals(m.Name, modelName, StringComparison.OrdinalIgnoreCase))
|
||||
return m;
|
||||
}
|
||||
throw new ArgumentException($"Unknown model: '{modelName}'. Available: tiny, base, small");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Metadata about a Whisper model variant.</summary>
|
||||
/// <param name="FileName">On-disk filename (e.g. "ggml-base.bin").</param>
|
||||
/// <param name="Name">Short identifier used by callers ("tiny" / "base" / "small").</param>
|
||||
/// <param name="ApproximateSizeBytes">Approximate size hint for UI; the
|
||||
/// actual size is asserted against <paramref name="Sha256"/> after download.</param>
|
||||
/// <param name="DownloadUrl">HTTPS URL of the model file.</param>
|
||||
/// <param name="Sha256">Pinned lowercase hex SHA-256 of the downloaded file.
|
||||
/// MUST be set; downloads are refused when null. See the catalog for the
|
||||
/// "verified on" date — these need re-verification before any public
|
||||
/// release (see Audio_FollowUps.md §2).</param>
|
||||
public sealed record WhisperModelInfo(
|
||||
string FileName,
|
||||
string Name,
|
||||
long ApproximateSizeBytes,
|
||||
string DownloadUrl,
|
||||
string? Sha256);
|
||||
154
src/OpenClaw.Shared/Capabilities/AppCapability.cs
Normal file
154
src/OpenClaw.Shared/Capabilities/AppCapability.cs
Normal file
@ -0,0 +1,154 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// App-level capability exposing navigation, status, and configuration
|
||||
/// through the MCP server for programmatic testing and CLI agents.
|
||||
/// </summary>
|
||||
public class AppCapability : NodeCapabilityBase
|
||||
{
|
||||
public override string Category => "app";
|
||||
|
||||
private static readonly string[] _commands = new[]
|
||||
{
|
||||
"app.navigate",
|
||||
"app.status",
|
||||
"app.sessions",
|
||||
"app.agents",
|
||||
"app.nodes",
|
||||
"app.config.get",
|
||||
"app.settings.get",
|
||||
"app.settings.set",
|
||||
"app.menu",
|
||||
"app.search",
|
||||
};
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
// Handler delegates — wired up by App.xaml.cs after construction.
|
||||
public Func<string, Task<object?>>? NavigateHandler;
|
||||
public Func<object?>? StatusHandler;
|
||||
public Func<string?, Task<object?>>? SessionsHandler;
|
||||
public Func<Task<object?>>? AgentsHandler;
|
||||
public Func<object?>? NodesHandler;
|
||||
public Func<string?, Task<object?>>? ConfigGetHandler;
|
||||
public Func<string, object?>? SettingsGetHandler;
|
||||
public Func<string, string, object?>? SettingsSetHandler;
|
||||
public Func<object?>? MenuHandler;
|
||||
public Func<string, object?>? SearchHandler;
|
||||
|
||||
public AppCapability(IOpenClawLogger logger) : base(logger) { }
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
return request.Command switch
|
||||
{
|
||||
"app.navigate" => await HandleNavigate(request),
|
||||
"app.status" => HandleStatus(),
|
||||
"app.sessions" => await HandleSessions(request),
|
||||
"app.agents" => await HandleAgents(),
|
||||
"app.nodes" => HandleNodes(),
|
||||
"app.config.get" => await HandleConfigGet(request),
|
||||
"app.settings.get" => HandleSettingsGet(request),
|
||||
"app.settings.set" => HandleSettingsSet(request),
|
||||
"app.menu" => HandleMenu(),
|
||||
"app.search" => HandleSearch(request),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleNavigate(NodeInvokeRequest request)
|
||||
{
|
||||
var page = GetStringArg(request.Args, "page");
|
||||
if (string.IsNullOrEmpty(page))
|
||||
return Error("Missing required arg: page");
|
||||
if (NavigateHandler == null)
|
||||
return Error("Navigate handler not registered");
|
||||
var result = await NavigateHandler(page);
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleStatus()
|
||||
{
|
||||
if (StatusHandler == null)
|
||||
return Error("Status handler not registered");
|
||||
return Success(StatusHandler());
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleSessions(NodeInvokeRequest request)
|
||||
{
|
||||
var agentId = GetStringArg(request.Args, "agentId");
|
||||
if (SessionsHandler == null)
|
||||
return Error("Sessions handler not registered");
|
||||
var result = await SessionsHandler(agentId);
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleAgents()
|
||||
{
|
||||
if (AgentsHandler == null)
|
||||
return Error("Agents handler not registered");
|
||||
var result = await AgentsHandler();
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleNodes()
|
||||
{
|
||||
if (NodesHandler == null)
|
||||
return Error("Nodes handler not registered");
|
||||
return Success(NodesHandler());
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleConfigGet(NodeInvokeRequest request)
|
||||
{
|
||||
var path = GetStringArg(request.Args, "path");
|
||||
if (ConfigGetHandler == null)
|
||||
return Error("Config handler not registered");
|
||||
var result = await ConfigGetHandler(path);
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleSettingsGet(NodeInvokeRequest request)
|
||||
{
|
||||
var name = GetStringArg(request.Args, "name");
|
||||
if (string.IsNullOrEmpty(name))
|
||||
return Error("Missing required arg: name");
|
||||
if (SettingsGetHandler == null)
|
||||
return Error("Settings handler not registered");
|
||||
return Success(SettingsGetHandler(name));
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleSettingsSet(NodeInvokeRequest request)
|
||||
{
|
||||
var name = GetStringArg(request.Args, "name");
|
||||
var value = GetStringArg(request.Args, "value");
|
||||
if (string.IsNullOrEmpty(name))
|
||||
return Error("Missing required arg: name");
|
||||
if (value == null)
|
||||
return Error("Missing required arg: value");
|
||||
if (SettingsSetHandler == null)
|
||||
return Error("Settings handler not registered");
|
||||
return Success(SettingsSetHandler(name, value));
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleMenu()
|
||||
{
|
||||
if (MenuHandler == null)
|
||||
return Error("Menu handler not registered");
|
||||
return Success(MenuHandler());
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleSearch(NodeInvokeRequest request)
|
||||
{
|
||||
var query = GetStringArg(request.Args, "query");
|
||||
if (string.IsNullOrEmpty(query))
|
||||
return Error("Missing required arg: query");
|
||||
if (SearchHandler == null)
|
||||
return Error("Search handler not registered");
|
||||
return Success(SearchHandler(query));
|
||||
}
|
||||
}
|
||||
295
src/OpenClaw.Shared/Capabilities/BrowserProxyCapability.cs
Normal file
295
src/OpenClaw.Shared/Capabilities/BrowserProxyCapability.cs
Normal file
@ -0,0 +1,295 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
public class BrowserProxyCapability : NodeCapabilityBase
|
||||
{
|
||||
private const int DefaultTimeoutMs = 20_000;
|
||||
private const int MaxTimeoutMs = 120_000;
|
||||
private const long MaxFileBytes = 10 * 1024 * 1024;
|
||||
private static readonly string[] s_commands = ["browser.proxy"];
|
||||
private readonly string _gatewayUrl;
|
||||
private readonly string _bearerToken;
|
||||
private readonly int? _sshRemoteGatewayPort;
|
||||
private readonly HttpClient _httpClient;
|
||||
|
||||
public BrowserProxyCapability(
|
||||
IOpenClawLogger logger,
|
||||
string gatewayUrl,
|
||||
string? bearerToken,
|
||||
HttpMessageHandler? handler = null,
|
||||
int? sshRemoteGatewayPort = null) : base(logger)
|
||||
{
|
||||
_gatewayUrl = gatewayUrl;
|
||||
_bearerToken = bearerToken ?? "";
|
||||
_sshRemoteGatewayPort = sshRemoteGatewayPort;
|
||||
_httpClient = handler == null ? new HttpClient() : new HttpClient(handler);
|
||||
}
|
||||
|
||||
public override string Category => "browser";
|
||||
public override IReadOnlyList<string> Commands => s_commands;
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
if (!string.Equals(request.Command, "browser.proxy", StringComparison.OrdinalIgnoreCase))
|
||||
return Error($"Unknown command: {request.Command}");
|
||||
|
||||
if (!TryResolveControlEndpoint(_gatewayUrl, out var controlPort, out var endpointError))
|
||||
return Error(endpointError);
|
||||
|
||||
var method = GetStringArg(request.Args, "method", "GET")?.ToUpperInvariant() ?? "GET";
|
||||
if (method is not ("GET" or "POST" or "DELETE"))
|
||||
method = "GET";
|
||||
|
||||
var rawPath = GetStringArg(request.Args, "path", "");
|
||||
if (!TryNormalizePath(rawPath, out var path, out var pathError))
|
||||
return Error(pathError);
|
||||
|
||||
var timeoutMs = Math.Clamp(GetIntArg(request.Args, "timeoutMs", DefaultTimeoutMs), 1, MaxTimeoutMs);
|
||||
using var timeoutCts = new System.Threading.CancellationTokenSource(TimeSpan.FromMilliseconds(timeoutMs));
|
||||
|
||||
var uri = BuildUri(controlPort, path, request.Args);
|
||||
try
|
||||
{
|
||||
using var httpRequest = CreateHttpRequest(method, uri, request.Args, usePasswordAuth: false);
|
||||
using var response = await _httpClient.SendAsync(httpRequest, timeoutCts.Token);
|
||||
var responseText = await response.Content.ReadAsStringAsync(timeoutCts.Token);
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.Unauthorized &&
|
||||
!string.IsNullOrWhiteSpace(_bearerToken))
|
||||
{
|
||||
using var passwordRequest = CreateHttpRequest(method, uri, request.Args, usePasswordAuth: true);
|
||||
using var passwordResponse = await _httpClient.SendAsync(passwordRequest, timeoutCts.Token);
|
||||
var passwordResponseText = await passwordResponse.Content.ReadAsStringAsync(timeoutCts.Token);
|
||||
return BuildProxyResponse(passwordResponse, passwordResponseText);
|
||||
}
|
||||
|
||||
return BuildProxyResponse(response, responseText);
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return Error($"browser proxy timed out for {method} {path} after {timeoutMs}ms. {BuildReachabilityGuidance(controlPort, _sshRemoteGatewayPort)}");
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return Error($"Browser control host is not reachable on 127.0.0.1:{controlPort}: {ex.Message}. {BuildReachabilityGuidance(controlPort, _sshRemoteGatewayPort)}");
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
return Error($"Browser control host returned invalid JSON: {ex.Message}");
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
return Error($"Browser proxy file read failed: {ex.Message}");
|
||||
}
|
||||
catch (UnauthorizedAccessException ex)
|
||||
{
|
||||
return Error($"Browser proxy file read denied: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private HttpRequestMessage CreateHttpRequest(string method, Uri uri, JsonElement args, bool usePasswordAuth)
|
||||
{
|
||||
var httpRequest = new HttpRequestMessage(new HttpMethod(method), uri);
|
||||
if (!string.IsNullOrWhiteSpace(_bearerToken))
|
||||
{
|
||||
if (usePasswordAuth)
|
||||
{
|
||||
httpRequest.Headers.TryAddWithoutValidation("x-openclaw-password", _bearerToken);
|
||||
httpRequest.Headers.Authorization = new AuthenticationHeaderValue(
|
||||
"Basic",
|
||||
Convert.ToBase64String(Encoding.UTF8.GetBytes($":{_bearerToken}")));
|
||||
}
|
||||
else
|
||||
{
|
||||
httpRequest.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _bearerToken);
|
||||
}
|
||||
}
|
||||
|
||||
if (method is "POST" or "DELETE" &&
|
||||
args.ValueKind == JsonValueKind.Object &&
|
||||
args.TryGetProperty("body", out var body))
|
||||
{
|
||||
httpRequest.Content = new StringContent(body.GetRawText(), Encoding.UTF8, "application/json");
|
||||
}
|
||||
|
||||
return httpRequest;
|
||||
}
|
||||
|
||||
private NodeInvokeResponse BuildProxyResponse(HttpResponseMessage response, string responseText)
|
||||
{
|
||||
if (response.StatusCode == HttpStatusCode.Unauthorized)
|
||||
return Error(BuildAuthenticationFailureGuidance());
|
||||
if (!response.IsSuccessStatusCode)
|
||||
return Error(string.IsNullOrWhiteSpace(responseText) ? $"Browser control host returned HTTP {(int)response.StatusCode}" : responseText);
|
||||
|
||||
using var doc = string.IsNullOrWhiteSpace(responseText)
|
||||
? JsonDocument.Parse("{}")
|
||||
: JsonDocument.Parse(responseText);
|
||||
var result = doc.RootElement.Clone();
|
||||
var files = TryCollectFiles(result);
|
||||
|
||||
return files.Count == 0
|
||||
? Success(new { result })
|
||||
: Success(new { result, files });
|
||||
}
|
||||
|
||||
private string BuildAuthenticationFailureGuidance()
|
||||
{
|
||||
return string.IsNullOrWhiteSpace(_bearerToken)
|
||||
? "Browser control host rejected the unauthenticated request. Windows has no gateway shared token saved for browser-control auth; enter the matching gateway token in Settings or run the browser-control host with compatible auth."
|
||||
: "Browser control host rejected authentication. Verify the gateway token saved in Settings matches the browser-control host auth token or password.";
|
||||
}
|
||||
|
||||
private static bool TryResolveControlEndpoint(string gatewayUrl, out int controlPort, out string error)
|
||||
{
|
||||
controlPort = 0;
|
||||
error = "";
|
||||
if (!Uri.TryCreate(gatewayUrl, UriKind.Absolute, out var gatewayUri) || gatewayUri.Port <= 0)
|
||||
{
|
||||
error = "Browser proxy requires a gateway URL with an explicit local port.";
|
||||
return false;
|
||||
}
|
||||
|
||||
controlPort = gatewayUri.Port + 2;
|
||||
if (controlPort > 65535)
|
||||
{
|
||||
error = "Browser proxy control port is outside the valid TCP port range.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static string BuildReachabilityGuidance(int localControlPort, int? sshRemoteGatewayPort)
|
||||
{
|
||||
var sshForward = sshRemoteGatewayPort is >= 1 and <= 65533
|
||||
? $"ssh -N -L {localControlPort}:127.0.0.1:{sshRemoteGatewayPort.Value + 2} <user>@<host>"
|
||||
: $"ssh -N -L {localControlPort}:127.0.0.1:<remote-gateway-port+2> <user>@<host>";
|
||||
|
||||
return $"Start the local OpenClaw browser control host on gateway port + 2 ({localControlPort}). If the gateway is reached through SSH, also forward the browser-control port with: {sshForward}";
|
||||
}
|
||||
|
||||
private static bool TryNormalizePath(string? rawPath, out string path, out string error)
|
||||
{
|
||||
path = "";
|
||||
error = "";
|
||||
var candidate = rawPath?.Trim() ?? "";
|
||||
if (candidate.Length == 0)
|
||||
{
|
||||
error = "INVALID_REQUEST: path required";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (candidate.Contains("://", StringComparison.Ordinal) || candidate.StartsWith("//", StringComparison.Ordinal))
|
||||
{
|
||||
error = "INVALID_REQUEST: browser.proxy path must be a local control path, not a URL";
|
||||
return false;
|
||||
}
|
||||
|
||||
path = candidate.StartsWith("/", StringComparison.Ordinal) ? candidate : "/" + candidate;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static Uri BuildUri(int controlPort, string path, JsonElement args)
|
||||
{
|
||||
var builder = new UriBuilder("http", "127.0.0.1", controlPort, path);
|
||||
var query = new List<string>();
|
||||
if (args.ValueKind != JsonValueKind.Object)
|
||||
return builder.Uri;
|
||||
|
||||
if (args.TryGetProperty("query", out var queryElement) && queryElement.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
foreach (var prop in queryElement.EnumerateObject())
|
||||
{
|
||||
if (prop.Value.ValueKind is JsonValueKind.Null or JsonValueKind.Undefined)
|
||||
continue;
|
||||
|
||||
var value = prop.Value.ValueKind == JsonValueKind.String
|
||||
? prop.Value.GetString()
|
||||
: prop.Value.ToString();
|
||||
if (value != null)
|
||||
query.Add($"{Uri.EscapeDataString(prop.Name)}={Uri.EscapeDataString(value)}");
|
||||
}
|
||||
}
|
||||
|
||||
if (args.TryGetProperty("profile", out var profileElement) &&
|
||||
profileElement.ValueKind == JsonValueKind.String &&
|
||||
!string.IsNullOrWhiteSpace(profileElement.GetString()))
|
||||
{
|
||||
query.Add($"profile={Uri.EscapeDataString(profileElement.GetString()!)}");
|
||||
}
|
||||
|
||||
builder.Query = string.Join("&", query);
|
||||
return builder.Uri;
|
||||
}
|
||||
|
||||
private static List<object> TryCollectFiles(JsonElement result)
|
||||
{
|
||||
var paths = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
CollectPath(result, "path", paths);
|
||||
CollectPath(result, "imagePath", paths);
|
||||
if (result.ValueKind == JsonValueKind.Object &&
|
||||
result.TryGetProperty("download", out var download) &&
|
||||
download.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
CollectPath(download, "path", paths);
|
||||
}
|
||||
|
||||
var files = new List<object>();
|
||||
foreach (var path in paths)
|
||||
{
|
||||
var info = new FileInfo(path);
|
||||
if (!info.Exists || (info.Attributes & FileAttributes.Directory) == FileAttributes.Directory)
|
||||
continue;
|
||||
if (info.Length > MaxFileBytes)
|
||||
throw new IOException($"browser proxy file exceeds {MaxFileBytes / (1024 * 1024)}MB: {path}");
|
||||
|
||||
var bytes = File.ReadAllBytes(path);
|
||||
files.Add(new
|
||||
{
|
||||
path,
|
||||
base64 = Convert.ToBase64String(bytes),
|
||||
mimeType = GuessMimeType(path)
|
||||
});
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
private static void CollectPath(JsonElement source, string propertyName, HashSet<string> paths)
|
||||
{
|
||||
if (source.ValueKind != JsonValueKind.Object ||
|
||||
!source.TryGetProperty(propertyName, out var value) ||
|
||||
value.ValueKind != JsonValueKind.String)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var path = value.GetString();
|
||||
if (!string.IsNullOrWhiteSpace(path))
|
||||
paths.Add(path);
|
||||
}
|
||||
|
||||
private static string? GuessMimeType(string path)
|
||||
{
|
||||
return Path.GetExtension(path).ToLowerInvariant() switch
|
||||
{
|
||||
".png" => "image/png",
|
||||
".jpg" or ".jpeg" => "image/jpeg",
|
||||
".pdf" => "application/pdf",
|
||||
".txt" => "text/plain",
|
||||
".json" => "application/json",
|
||||
".html" or ".htm" => "text/html",
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -14,8 +14,8 @@ public class CameraCapability : NodeCapabilityBase
|
||||
private static readonly string[] _commands = new[]
|
||||
{
|
||||
"camera.list",
|
||||
"camera.snap"
|
||||
// Future: "camera.clip" (video)
|
||||
"camera.snap",
|
||||
"camera.clip"
|
||||
};
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
@ -23,10 +23,14 @@ public class CameraCapability : NodeCapabilityBase
|
||||
// Events for platform-specific implementation
|
||||
public event Func<Task<CameraInfo[]>>? ListRequested;
|
||||
public event Func<CameraSnapArgs, Task<CameraSnapResult>>? SnapRequested;
|
||||
public event Func<CameraClipArgs, Task<CameraClipResult>>? ClipRequested;
|
||||
|
||||
public CameraCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
}
|
||||
|
||||
private static int Clamp(int value, int min, int max)
|
||||
=> value < min ? min : (value > max ? max : value);
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
@ -34,6 +38,7 @@ public class CameraCapability : NodeCapabilityBase
|
||||
{
|
||||
"camera.list" => await HandleListAsync(request),
|
||||
"camera.snap" => await HandleSnapAsync(request),
|
||||
"camera.clip" => await HandleClipAsync(request),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
@ -55,16 +60,23 @@ public class CameraCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Camera list failed", ex);
|
||||
return Error($"List failed: {ex.Message}");
|
||||
return Error("List failed");
|
||||
}
|
||||
}
|
||||
|
||||
// Boundary clamps — reject extreme/negative caller values up-front.
|
||||
private const int MinCameraDimension = 16;
|
||||
private const int MaxCameraWidth = 4096;
|
||||
private const int MinQuality = 1;
|
||||
private const int MaxQuality = 100;
|
||||
private const int MaxClipDurationMs = 60_000;
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleSnapAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var deviceId = GetStringArg(request.Args, "deviceId");
|
||||
var format = GetStringArg(request.Args, "format", "jpeg");
|
||||
var maxWidth = GetIntArg(request.Args, "maxWidth", 1280);
|
||||
var quality = GetIntArg(request.Args, "quality", 80);
|
||||
var maxWidth = Clamp(GetIntArg(request.Args, "maxWidth", 1280), MinCameraDimension, MaxCameraWidth);
|
||||
var quality = Clamp(GetIntArg(request.Args, "quality", 80), MinQuality, MaxQuality);
|
||||
|
||||
Logger.Info($"camera.snap: deviceId={deviceId ?? "(default)"}, format={format}");
|
||||
|
||||
@ -94,7 +106,48 @@ public class CameraCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Camera snap failed", ex);
|
||||
return Error($"Snap failed: {ex.Message}");
|
||||
return Error("Snap failed");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleClipAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var deviceId = GetStringArg(request.Args, "deviceId");
|
||||
// Floor at 100ms — anything shorter is meaningless and a 0/negative
|
||||
// value previously slipped through the `Math.Min` cap.
|
||||
var durationMs = Clamp(GetIntArg(request.Args, "durationMs", 3000), 100, MaxClipDurationMs);
|
||||
var includeAudio = GetBoolArg(request.Args, "includeAudio", true);
|
||||
var format = GetStringArg(request.Args, "format", "mp4") ?? "mp4";
|
||||
|
||||
Logger.Info($"camera.clip: deviceId={deviceId ?? "(default)"}, durationMs={durationMs}, includeAudio={includeAudio}, format={format}");
|
||||
|
||||
if (ClipRequested == null)
|
||||
{
|
||||
return Error("Camera clip not available");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var result = await ClipRequested(new CameraClipArgs
|
||||
{
|
||||
DeviceId = deviceId,
|
||||
DurationMs = durationMs,
|
||||
IncludeAudio = includeAudio,
|
||||
Format = format
|
||||
});
|
||||
|
||||
return Success(new
|
||||
{
|
||||
format = result.Format,
|
||||
base64 = result.Base64,
|
||||
durationMs = result.DurationMs,
|
||||
hasAudio = result.HasAudio
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Camera clip failed", ex);
|
||||
return Error("Clip failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -121,3 +174,19 @@ public class CameraSnapResult
|
||||
public int Height { get; set; }
|
||||
public string Base64 { get; set; } = "";
|
||||
}
|
||||
|
||||
public class CameraClipArgs
|
||||
{
|
||||
public string? DeviceId { get; set; }
|
||||
public int DurationMs { get; set; } = 3000;
|
||||
public bool IncludeAudio { get; set; } = true;
|
||||
public string Format { get; set; } = "mp4";
|
||||
}
|
||||
|
||||
public class CameraClipResult
|
||||
{
|
||||
public string Format { get; set; } = "mp4";
|
||||
public string Base64 { get; set; } = "";
|
||||
public int DurationMs { get; set; }
|
||||
public bool HasAudio { get; set; }
|
||||
}
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Win32.SafeHandles;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
@ -20,7 +23,10 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
"canvas.eval",
|
||||
"canvas.snapshot",
|
||||
"canvas.a2ui.push",
|
||||
"canvas.a2ui.reset"
|
||||
"canvas.a2ui.pushJSONL",
|
||||
"canvas.a2ui.reset",
|
||||
"canvas.a2ui.dump",
|
||||
"canvas.caps",
|
||||
};
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
@ -28,15 +34,75 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
// Events for UI to handle
|
||||
public event EventHandler<CanvasPresentArgs>? PresentRequested;
|
||||
public event EventHandler? HideRequested;
|
||||
public event EventHandler<string>? NavigateRequested;
|
||||
public event Func<string, Task<string>>? EvalRequested;
|
||||
public event Func<CanvasSnapshotArgs, Task<string>>? SnapshotRequested;
|
||||
/// <summary>
|
||||
/// Subscriber decides how to handle a navigate request and returns the
|
||||
/// opener that actually serviced it: <c>"canvas"</c> if an in-process
|
||||
/// WebView2 frame navigated, <c>"browser"</c> if the URL was handed to the
|
||||
/// OS default browser. Throwing surfaces as an error to the gateway.
|
||||
/// Single-subscriber: same multi-handler hazard as the other Func events.
|
||||
/// </summary>
|
||||
private Func<string, Task<string>>? _navigateRequested;
|
||||
public event Func<string, Task<string>> NavigateRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _navigateRequested, value, nameof(NavigateRequested));
|
||||
remove => ClearSingleHandler(ref _navigateRequested, value);
|
||||
}
|
||||
// Func-based "events" are inherently single-handler — multi-subscribe to a
|
||||
// Delegate.Combine'd Func silently invokes only the last subscriber's
|
||||
// return value, hiding the others. Expose them as single-subscriber events
|
||||
// that throw on a second subscribe so this is loud.
|
||||
private Func<string, Task<string>>? _evalRequested;
|
||||
public event Func<string, Task<string>> EvalRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _evalRequested, value, nameof(EvalRequested));
|
||||
remove => ClearSingleHandler(ref _evalRequested, value);
|
||||
}
|
||||
private Func<CanvasSnapshotArgs, Task<string>>? _snapshotRequested;
|
||||
public event Func<CanvasSnapshotArgs, Task<string>> SnapshotRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _snapshotRequested, value, nameof(SnapshotRequested));
|
||||
remove => ClearSingleHandler(ref _snapshotRequested, value);
|
||||
}
|
||||
public event EventHandler<CanvasA2UIArgs>? A2UIPushRequested;
|
||||
public event EventHandler? A2UIResetRequested;
|
||||
/// <summary>Returns a JSON state dump of the native A2UI surface graph.</summary>
|
||||
private Func<Task<string>>? _a2uiDumpRequested;
|
||||
public event Func<Task<string>> A2UIDumpRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _a2uiDumpRequested, value, nameof(A2UIDumpRequested));
|
||||
remove => ClearSingleHandler(ref _a2uiDumpRequested, value);
|
||||
}
|
||||
/// <summary>Returns a JSON capability summary describing which canvas operations are supported.</summary>
|
||||
private Func<Task<string>>? _capsRequested;
|
||||
public event Func<Task<string>> CapsRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _capsRequested, value, nameof(CapsRequested));
|
||||
remove => ClearSingleHandler(ref _capsRequested, value);
|
||||
}
|
||||
|
||||
private static void SetSingleHandler<T>(ref T? slot, T value, string name) where T : Delegate
|
||||
{
|
||||
if (slot != null && !ReferenceEquals(slot, value))
|
||||
throw new InvalidOperationException($"{name} accepts only one subscriber. Detach the previous handler first.");
|
||||
slot = value;
|
||||
}
|
||||
private static void ClearSingleHandler<T>(ref T? slot, T value) where T : Delegate
|
||||
{
|
||||
if (ReferenceEquals(slot, value)) slot = null;
|
||||
}
|
||||
|
||||
public CanvasCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
}
|
||||
|
||||
private static int Clamp(int value, int min, int max)
|
||||
=> value < min ? min : (value > max ? max : value);
|
||||
|
||||
private static int ClampPosition(int value)
|
||||
{
|
||||
if (value == -1) return -1; // documented "center" sentinel
|
||||
return value < MinPosition ? MinPosition : (value > MaxPosition ? MaxPosition : value);
|
||||
}
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
@ -48,19 +114,39 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
"canvas.eval" => await HandleEvalAsync(request),
|
||||
"canvas.snapshot" => await HandleSnapshotAsync(request),
|
||||
"canvas.a2ui.push" => HandleA2UIPush(request),
|
||||
"canvas.a2ui.pushJSONL" => HandleA2UIPush(request),
|
||||
"canvas.a2ui.reset" => HandleA2UIReset(request),
|
||||
"canvas.a2ui.dump" => await HandleA2UIDumpAsync(),
|
||||
"canvas.caps" => await HandleCapsAsync(),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
// Window-bounds clamps. -1 is the documented "center" sentinel for x/y so
|
||||
// we preserve negatives below MinPosition by routing them to -1.
|
||||
private const int MinDimension = 100;
|
||||
private const int MaxDimension = 7680;
|
||||
private const int MinPosition = -16384;
|
||||
private const int MaxPosition = 16384;
|
||||
private const int MinSnapshotWidth = 32;
|
||||
private const int MaxSnapshotWidth = 7680;
|
||||
private const int MinQuality = 1;
|
||||
private const int MaxQuality = 100;
|
||||
|
||||
// A2UI push caps. Inline transport in McpHttpServer caps at 4 MiB; jsonlPath
|
||||
// bypasses that, so re-enforce here. The line-count cap protects the UI thread
|
||||
// from a single push that explodes into thousands of dispatcher posts.
|
||||
internal const long MaxA2UIJsonlBytes = 4L * 1024 * 1024;
|
||||
internal const int MaxA2UIJsonlLines = 4096;
|
||||
|
||||
private Task<NodeInvokeResponse> HandlePresentAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var url = GetStringArg(request.Args, "url");
|
||||
var html = GetStringArg(request.Args, "html");
|
||||
var width = GetIntArg(request.Args, "width", 800);
|
||||
var height = GetIntArg(request.Args, "height", 600);
|
||||
var x = GetIntArg(request.Args, "x", -1); // -1 = center
|
||||
var y = GetIntArg(request.Args, "y", -1);
|
||||
var width = Clamp(GetIntArg(request.Args, "width", 800), MinDimension, MaxDimension);
|
||||
var height = Clamp(GetIntArg(request.Args, "height", 600), MinDimension, MaxDimension);
|
||||
var x = ClampPosition(GetIntArg(request.Args, "x", -1)); // -1 = center
|
||||
var y = ClampPosition(GetIntArg(request.Args, "y", -1));
|
||||
var title = GetStringArg(request.Args, "title", "Canvas");
|
||||
var alwaysOnTop = GetBoolArg(request.Args, "alwaysOnTop", false);
|
||||
|
||||
@ -90,16 +176,49 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleNavigateAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var url = GetStringArg(request.Args, "url");
|
||||
if (string.IsNullOrEmpty(url))
|
||||
var rawUrl = GetStringArg(request.Args, "url");
|
||||
if (string.IsNullOrEmpty(rawUrl))
|
||||
{
|
||||
return Error("Missing url parameter");
|
||||
}
|
||||
|
||||
Logger.Info($"canvas.navigate: {url}");
|
||||
NavigateRequested?.Invoke(this, url);
|
||||
|
||||
return Success(new { navigated = true });
|
||||
|
||||
// Validate up front so the OS-level Process.Start in the subscriber
|
||||
// can't be tricked into shell-executing javascript:/file:/app-protocol
|
||||
// URIs. The subscriber re-validates as defense-in-depth.
|
||||
if (!HttpUrlValidator.TryParse(rawUrl, out var canonical, out var validationError))
|
||||
{
|
||||
// Avoid leaking the raw URL — agents sometimes hand us tokenized
|
||||
// OAuth/reset URLs that fail validation, and our log files have
|
||||
// an effectively-unbounded retention policy. Sanitize to scheme +
|
||||
// host + first path segment.
|
||||
Logger.Warn($"canvas.navigate rejected: {validationError} (sanitized: {UrlLogSanitizer.Sanitize(rawUrl)})");
|
||||
return Error($"Invalid url: {validationError}");
|
||||
}
|
||||
|
||||
Logger.Info($"canvas.navigate: {UrlLogSanitizer.Sanitize(canonical)}");
|
||||
|
||||
var handler = _navigateRequested;
|
||||
if (handler == null)
|
||||
{
|
||||
// No subscriber means there's no surface to navigate and no opener
|
||||
// to fall back to. Tell the agent honestly so it can pick another
|
||||
// tool instead of believing it succeeded.
|
||||
return Error("CANVAS_NOT_AVAILABLE: no navigate handler registered");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var opener = await handler(canonical!);
|
||||
// opener is the subscriber's word for how it serviced the request:
|
||||
// "canvas" (existing WebView2 frame), "browser" (default browser),
|
||||
// or anything else the subscriber wants to surface back to the agent.
|
||||
return Success(new { navigated = true, opener, url = canonical });
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error($"canvas.navigate handler failed: {ex.Message}", ex);
|
||||
return Error($"Navigate failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleEvalAsync(NodeInvokeRequest request)
|
||||
@ -112,16 +231,17 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
return Error("Missing script parameter");
|
||||
}
|
||||
|
||||
Logger.Info($"canvas.eval: {script.Substring(0, Math.Min(50, script.Length))}...");
|
||||
Logger.Info($"canvas.eval: {script[..Math.Min(50, script.Length)]}...");
|
||||
|
||||
if (EvalRequested == null)
|
||||
var evalHandler = _evalRequested;
|
||||
if (evalHandler == null)
|
||||
{
|
||||
return Error("Canvas not available");
|
||||
}
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
var result = await EvalRequested(script);
|
||||
var result = await evalHandler(script);
|
||||
return Success(new { result });
|
||||
}
|
||||
catch (Exception ex)
|
||||
@ -133,19 +253,20 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
private async Task<NodeInvokeResponse> HandleSnapshotAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var format = GetStringArg(request.Args, "format", "png");
|
||||
var maxWidth = GetIntArg(request.Args, "maxWidth", 1200);
|
||||
var quality = GetIntArg(request.Args, "quality", 80);
|
||||
var maxWidth = Clamp(GetIntArg(request.Args, "maxWidth", 1200), MinSnapshotWidth, MaxSnapshotWidth);
|
||||
var quality = Clamp(GetIntArg(request.Args, "quality", 80), MinQuality, MaxQuality);
|
||||
|
||||
Logger.Info($"canvas.snapshot: format={format}, maxWidth={maxWidth}");
|
||||
|
||||
if (SnapshotRequested == null)
|
||||
var snapshotHandler = _snapshotRequested;
|
||||
if (snapshotHandler == null)
|
||||
{
|
||||
return Error("Canvas not available");
|
||||
}
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
var base64 = await SnapshotRequested(new CanvasSnapshotArgs
|
||||
var base64 = await snapshotHandler(new CanvasSnapshotArgs
|
||||
{
|
||||
Format = format ?? "png",
|
||||
MaxWidth = maxWidth,
|
||||
@ -165,53 +286,193 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
var jsonl = GetStringArg(request.Args, "jsonl");
|
||||
var jsonlPath = GetStringArg(request.Args, "jsonlPath");
|
||||
var props = request.Args.TryGetProperty("props", out var propsEl) ? propsEl : default;
|
||||
|
||||
|
||||
if (string.IsNullOrWhiteSpace(jsonl) && !string.IsNullOrWhiteSpace(jsonlPath))
|
||||
{
|
||||
// Validate jsonlPath to prevent arbitrary file reads.
|
||||
// Resolve to absolute path and reject traversal or suspicious paths.
|
||||
try
|
||||
{
|
||||
var fullPath = Path.GetFullPath(jsonlPath);
|
||||
var tempRoot = Path.GetFullPath(Path.GetTempPath());
|
||||
if (!fullPath.StartsWith(tempRoot, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Logger.Warn($"canvas.a2ui.push: jsonlPath outside temp directory: {fullPath}");
|
||||
return Error("jsonlPath must be within the system temp directory");
|
||||
}
|
||||
jsonl = ReadValidatedJsonlPath(jsonlPath, request.Command);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Error($"Invalid jsonlPath: {ex.Message}");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
jsonl = File.ReadAllText(jsonlPath);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error($"canvas.a2ui.push: failed to read jsonlPath ({jsonlPath})", ex);
|
||||
Logger.Error($"{request.Command}: failed to read jsonlPath", ex);
|
||||
return Error($"Failed to read jsonlPath: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (string.IsNullOrWhiteSpace(jsonl))
|
||||
{
|
||||
return Error("Missing jsonl or jsonlPath parameter");
|
||||
}
|
||||
|
||||
Logger.Info($"canvas.a2ui.push: {jsonl.Length} chars");
|
||||
|
||||
|
||||
// Inline-jsonl size cap. Encoding.UTF8.GetByteCount streams over chars
|
||||
// without allocating, so this is cheap.
|
||||
long byteCount = System.Text.Encoding.UTF8.GetByteCount(jsonl);
|
||||
if (byteCount > MaxA2UIJsonlBytes)
|
||||
{
|
||||
Logger.Warn($"{request.Command}: jsonl payload too large ({byteCount} > {MaxA2UIJsonlBytes})");
|
||||
return Error($"jsonl exceeds maximum size of {MaxA2UIJsonlBytes} bytes");
|
||||
}
|
||||
|
||||
// Line-count cap. A push that fans out to thousands of UI-thread
|
||||
// dispatches has DoS potential even if individually small.
|
||||
int lineCount = CountLines(jsonl);
|
||||
if (lineCount > MaxA2UIJsonlLines)
|
||||
{
|
||||
Logger.Warn($"{request.Command}: jsonl line count too high ({lineCount} > {MaxA2UIJsonlLines})");
|
||||
return Error($"jsonl exceeds maximum of {MaxA2UIJsonlLines} lines");
|
||||
}
|
||||
|
||||
Logger.Info($"{request.Command}: {byteCount} bytes, {lineCount} lines");
|
||||
|
||||
A2UIPushRequested?.Invoke(this, new CanvasA2UIArgs
|
||||
{
|
||||
Jsonl = jsonl,
|
||||
JsonlPath = jsonlPath,
|
||||
Props = props.ValueKind != default ? props.GetRawText() : "{}"
|
||||
});
|
||||
|
||||
|
||||
return Success(new { pushed = true });
|
||||
}
|
||||
|
||||
private static int CountLines(string s)
|
||||
{
|
||||
// Count non-empty newline-delimited lines without allocating an array.
|
||||
int count = 0;
|
||||
bool inLine = false;
|
||||
for (int i = 0; i < s.Length; i++)
|
||||
{
|
||||
char c = s[i];
|
||||
if (c == '\n' || c == '\r')
|
||||
{
|
||||
if (inLine) { count++; inLine = false; }
|
||||
}
|
||||
else if (!char.IsWhiteSpace(c))
|
||||
{
|
||||
inLine = true;
|
||||
}
|
||||
}
|
||||
if (inLine) count++;
|
||||
return count;
|
||||
}
|
||||
|
||||
private string ReadValidatedJsonlPath(string jsonlPath, string command)
|
||||
{
|
||||
string fullPath;
|
||||
string tempRoot;
|
||||
try
|
||||
{
|
||||
fullPath = Path.GetFullPath(jsonlPath);
|
||||
tempRoot = EnsureTrailingSeparator(Path.GetFullPath(Path.GetTempPath()));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException($"Invalid jsonlPath: {ex.Message}", ex);
|
||||
}
|
||||
|
||||
if (!IsPathWithinRoot(fullPath, tempRoot))
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath outside temp directory: {fullPath}");
|
||||
throw new InvalidOperationException("jsonlPath must be within the system temp directory");
|
||||
}
|
||||
|
||||
var fi = new FileInfo(fullPath);
|
||||
if (fi.Exists && fi.Attributes.HasFlag(FileAttributes.ReparsePoint))
|
||||
{
|
||||
FileSystemInfo? resolved;
|
||||
try
|
||||
{
|
||||
resolved = fi.ResolveLinkTarget(returnFinalTarget: true);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath reparse point could not be resolved: {ex.Message}");
|
||||
throw new InvalidOperationException("jsonlPath contains an unresolvable reparse point", ex);
|
||||
}
|
||||
|
||||
if (resolved == null)
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath reparse point could not be resolved");
|
||||
throw new InvalidOperationException("jsonlPath contains an unresolvable reparse point");
|
||||
}
|
||||
|
||||
if (!IsPathWithinRoot(resolved.FullName, tempRoot))
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath reparse point resolves outside temp directory: {resolved.FullName}");
|
||||
throw new InvalidOperationException("jsonlPath reparse point must resolve within the system temp directory");
|
||||
}
|
||||
}
|
||||
|
||||
using var stream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
// GetFinalPathFromHandle is a Windows-only guard (returns "" on non-Windows); skip the
|
||||
// containment check when no resolved path is available — prior symlink resolution covers that case.
|
||||
var finalPath = GetFinalPathFromHandle(stream.SafeFileHandle);
|
||||
if (!string.IsNullOrEmpty(finalPath) && !IsPathWithinRoot(finalPath, tempRoot))
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath file handle resolves outside temp directory: {finalPath}");
|
||||
throw new InvalidOperationException("jsonlPath must resolve within the system temp directory");
|
||||
}
|
||||
|
||||
if (stream.Length > MaxA2UIJsonlBytes)
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath file too large ({stream.Length} > {MaxA2UIJsonlBytes})");
|
||||
throw new InvalidOperationException($"jsonlPath exceeds maximum size of {MaxA2UIJsonlBytes} bytes");
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true);
|
||||
return reader.ReadToEnd();
|
||||
}
|
||||
|
||||
private static bool IsPathWithinRoot(string path, string root)
|
||||
{
|
||||
var normalizedPath = Path.GetFullPath(NormalizeFinalPath(path));
|
||||
var normalizedRoot = EnsureTrailingSeparator(Path.GetFullPath(NormalizeFinalPath(root)));
|
||||
return normalizedPath.StartsWith(normalizedRoot, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static string EnsureTrailingSeparator(string path)
|
||||
{
|
||||
if (path.EndsWith(Path.DirectorySeparatorChar) || path.EndsWith(Path.AltDirectorySeparatorChar))
|
||||
return path;
|
||||
return path + Path.DirectorySeparatorChar;
|
||||
}
|
||||
|
||||
private static string GetFinalPathFromHandle(SafeFileHandle handle)
|
||||
{
|
||||
if (!OperatingSystem.IsWindows())
|
||||
return string.Empty;
|
||||
|
||||
var capacity = 512;
|
||||
while (capacity <= 32768)
|
||||
{
|
||||
var sb = new StringBuilder(capacity);
|
||||
var length = GetFinalPathNameByHandle(handle, sb, (uint)sb.Capacity, 0);
|
||||
if (length == 0)
|
||||
throw new IOException($"GetFinalPathNameByHandle failed with Win32 error {Marshal.GetLastWin32Error()}");
|
||||
if (length < sb.Capacity)
|
||||
return NormalizeFinalPath(sb.ToString());
|
||||
capacity = (int)length + 1;
|
||||
}
|
||||
throw new IOException("GetFinalPathNameByHandle returned an unexpectedly long path");
|
||||
}
|
||||
|
||||
private static string NormalizeFinalPath(string path)
|
||||
{
|
||||
const string extendedPrefix = @"\\?\";
|
||||
const string extendedUncPrefix = @"\\?\UNC\";
|
||||
if (path.StartsWith(extendedUncPrefix, StringComparison.OrdinalIgnoreCase))
|
||||
return @"\\" + path.Substring(extendedUncPrefix.Length);
|
||||
if (path.StartsWith(extendedPrefix, StringComparison.OrdinalIgnoreCase))
|
||||
return path.Substring(extendedPrefix.Length);
|
||||
return path;
|
||||
}
|
||||
|
||||
[DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Unicode)]
|
||||
private static extern uint GetFinalPathNameByHandle(
|
||||
SafeFileHandle hFile,
|
||||
StringBuilder lpszFilePath,
|
||||
uint cchFilePath,
|
||||
uint dwFlags);
|
||||
|
||||
private NodeInvokeResponse HandleA2UIReset(NodeInvokeRequest request)
|
||||
{
|
||||
@ -219,6 +480,52 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
A2UIResetRequested?.Invoke(this, EventArgs.Empty);
|
||||
return Success(new { reset = true });
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleA2UIDumpAsync()
|
||||
{
|
||||
Logger.Info("canvas.a2ui.dump");
|
||||
var dumpHandler = _a2uiDumpRequested;
|
||||
if (dumpHandler == null)
|
||||
return Error("CANVAS_NOT_OPEN: no A2UI canvas is currently active");
|
||||
try
|
||||
{
|
||||
var json = await dumpHandler();
|
||||
// Pass through as a JSON-typed payload so MCP clients see structured data,
|
||||
// not a quoted string.
|
||||
using var doc = System.Text.Json.JsonDocument.Parse(json);
|
||||
return Success(System.Text.Json.JsonSerializer.Deserialize<object>(doc.RootElement.GetRawText()));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Error($"CANVAS_DUMP_FAILED: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleCapsAsync()
|
||||
{
|
||||
var capsHandler = _capsRequested;
|
||||
if (capsHandler == null)
|
||||
{
|
||||
return Success(new
|
||||
{
|
||||
renderer = "none",
|
||||
eval = false,
|
||||
snapshot = false,
|
||||
navigate = false,
|
||||
a2ui = new { version = "0.8", introspect = false },
|
||||
});
|
||||
}
|
||||
try
|
||||
{
|
||||
var json = await capsHandler();
|
||||
using var doc = System.Text.Json.JsonDocument.Parse(json);
|
||||
return Success(System.Text.Json.JsonSerializer.Deserialize<object>(doc.RootElement.GetRawText()));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Error($"CANVAS_CAPS_FAILED: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class CanvasPresentArgs : EventArgs
|
||||
|
||||
277
src/OpenClaw.Shared/Capabilities/DeviceCapability.cs
Normal file
277
src/OpenClaw.Shared/Capabilities/DeviceCapability.cs
Normal file
@ -0,0 +1,277 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net.NetworkInformation;
|
||||
using System.Reflection;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Device metadata and system health/status capability.
|
||||
/// device.info - static device metadata (no provider needed).
|
||||
/// device.status - rich system health data via injected IDeviceStatusProvider.
|
||||
/// </summary>
|
||||
public class DeviceCapability : NodeCapabilityBase
|
||||
{
|
||||
public override string Category => "device";
|
||||
|
||||
private static readonly string[] _commands =
|
||||
[
|
||||
"device.info",
|
||||
"device.status"
|
||||
];
|
||||
|
||||
private static readonly HashSet<string> _validSections = new(
|
||||
["os", "cpu", "memory", "disk", "battery"],
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private readonly IDeviceStatusProvider? _provider;
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
public DeviceCapability(IOpenClawLogger logger, IDeviceStatusProvider provider)
|
||||
: base(logger)
|
||||
{
|
||||
_provider = provider;
|
||||
}
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
return request.Command switch
|
||||
{
|
||||
"device.info" => HandleInfo(),
|
||||
"device.status" => await HandleStatusAsync(request),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleInfo()
|
||||
{
|
||||
Logger.Info("device.info");
|
||||
|
||||
var assembly = typeof(DeviceCapability).Assembly;
|
||||
var version = assembly.GetCustomAttribute<AssemblyInformationalVersionAttribute>()?.InformationalVersion
|
||||
?? assembly.GetName().Version?.ToString()
|
||||
?? "unknown";
|
||||
|
||||
return Success(new
|
||||
{
|
||||
deviceName = Environment.MachineName,
|
||||
modelIdentifier = GetModelIdentifier(),
|
||||
systemName = OperatingSystem.IsWindows() ? "Windows" : RuntimeInformation.OSDescription,
|
||||
systemVersion = RuntimeInformation.OSDescription,
|
||||
appVersion = version,
|
||||
appBuild = assembly.GetName().Version?.ToString() ?? version,
|
||||
locale = CultureInfo.CurrentCulture.Name
|
||||
});
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleStatusAsync(NodeInvokeRequest request)
|
||||
{
|
||||
if (_provider == null)
|
||||
return Error("Device status provider not available");
|
||||
|
||||
var sections = GetStringArrayArg(request.Args, "sections");
|
||||
|
||||
// Reject unknown section names
|
||||
var invalid = sections.Where(s => !_validSections.Contains(s)).ToArray();
|
||||
if (invalid.Length > 0)
|
||||
{
|
||||
return Error($"Unknown sections: {string.Join(", ", invalid)}. "
|
||||
+ $"Valid: {string.Join(", ", _validSections)}");
|
||||
}
|
||||
|
||||
bool all = sections.Length == 0;
|
||||
var result = new Dictionary<string, object?>
|
||||
{
|
||||
["collectedAt"] = DateTime.UtcNow.ToString("o")
|
||||
};
|
||||
|
||||
if (all || sections.Contains("os", StringComparer.OrdinalIgnoreCase))
|
||||
result["os"] = SafeCollect("os", () => _provider.GetOsInfo());
|
||||
|
||||
if (all || sections.Contains("cpu", StringComparer.OrdinalIgnoreCase))
|
||||
result["cpu"] = await SafeCollectAsync("cpu", () => _provider.GetCpuInfoAsync());
|
||||
|
||||
if (all || sections.Contains("memory", StringComparer.OrdinalIgnoreCase))
|
||||
result["memory"] = SafeCollect("memory", () => _provider.GetMemoryInfo());
|
||||
|
||||
if (all || sections.Contains("disk", StringComparer.OrdinalIgnoreCase))
|
||||
result["disk"] = SafeCollect("disk", () => _provider.GetDiskInfo());
|
||||
|
||||
if (all || sections.Contains("battery", StringComparer.OrdinalIgnoreCase))
|
||||
result["battery"] = SafeCollect("battery", () => WrapBatteryWithLegacyFields(_provider.GetBatteryInfo()));
|
||||
|
||||
// Always ensure legacy battery fields exist for backward compatibility.
|
||||
// Old contract: { level: null, state: "unknown", lowPowerModeEnabled: false }
|
||||
// Covers: battery not requested (filtered out), provider threw (SafeCollect
|
||||
// returned { error }), or battery is null.
|
||||
{
|
||||
var hasBattery = result.TryGetValue("battery", out var batteryVal) && batteryVal != null;
|
||||
var isError = hasBattery && batteryVal!.GetType().GetProperty("error") != null;
|
||||
|
||||
if (!hasBattery || isError)
|
||||
{
|
||||
string? errorMsg = null;
|
||||
if (isError)
|
||||
{
|
||||
var errProp = batteryVal!.GetType().GetProperty("error")!.GetValue(batteryVal);
|
||||
errorMsg = errProp?.ToString();
|
||||
}
|
||||
|
||||
result["battery"] = new
|
||||
{
|
||||
level = (double?)null,
|
||||
state = "unknown",
|
||||
lowPowerModeEnabled = false,
|
||||
error = errorMsg
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Legacy fields preserved for backward compatibility with existing consumers.
|
||||
result["thermal"] = new { state = "nominal" };
|
||||
result["storage"] = SafeCollect("storage", () => GetStorageStatus());
|
||||
result["network"] = SafeCollect("network", () => GetNetworkStatus());
|
||||
result["uptimeSeconds"] = Environment.TickCount64 / 1000.0;
|
||||
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
/// <summary>Per-section fault tolerance: one section failing doesn't kill the whole response.</summary>
|
||||
private object? SafeCollect(string section, Func<object> collector)
|
||||
{
|
||||
try { return collector(); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"device.status: {section} collection failed: {ex.Message}");
|
||||
return new { error = ex.Message };
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<object?> SafeCollectAsync(string section, Func<Task<object>> collector)
|
||||
{
|
||||
try { return await collector(); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"device.status: {section} collection failed: {ex.Message}");
|
||||
return new { error = ex.Message };
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wraps the provider's battery result with legacy fields (level, state, lowPowerModeEnabled)
|
||||
/// so old consumers that read battery.level / battery.state continue to work.
|
||||
/// </summary>
|
||||
private static object WrapBatteryWithLegacyFields(object providerResult)
|
||||
{
|
||||
// Serialize the provider result to a dictionary so we can merge legacy fields.
|
||||
var json = System.Text.Json.JsonSerializer.Serialize(providerResult);
|
||||
var dict = System.Text.Json.JsonSerializer.Deserialize<Dictionary<string, System.Text.Json.JsonElement>>(json)
|
||||
?? new Dictionary<string, System.Text.Json.JsonElement>();
|
||||
|
||||
// Map new fields to legacy equivalents.
|
||||
double? level = null;
|
||||
if (dict.TryGetValue("chargePercent", out var cp) && cp.ValueKind == System.Text.Json.JsonValueKind.Number)
|
||||
level = cp.GetDouble();
|
||||
|
||||
var isCharging = dict.TryGetValue("isCharging", out var ic)
|
||||
&& ic.ValueKind == System.Text.Json.JsonValueKind.True;
|
||||
|
||||
var state = isCharging ? "charging" : (level.HasValue ? "discharging" : "unknown");
|
||||
|
||||
var result = new Dictionary<string, object?>
|
||||
{
|
||||
// Legacy fields
|
||||
["level"] = level,
|
||||
["state"] = state,
|
||||
["lowPowerModeEnabled"] = false,
|
||||
};
|
||||
|
||||
// Merge all new fields from provider
|
||||
foreach (var kv in dict)
|
||||
result[kv.Key] = kv.Value;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static string GetModelIdentifier()
|
||||
{
|
||||
var processorIdentifier = Environment.GetEnvironmentVariable("PROCESSOR_IDENTIFIER");
|
||||
if (!string.IsNullOrWhiteSpace(processorIdentifier))
|
||||
{
|
||||
return processorIdentifier;
|
||||
}
|
||||
|
||||
return $"{RuntimeInformation.OSArchitecture}".ToLowerInvariant();
|
||||
}
|
||||
|
||||
#region Legacy helpers (backward compat)
|
||||
|
||||
private static object GetStorageStatus()
|
||||
{
|
||||
var root = Path.GetPathRoot(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile))
|
||||
?? Path.GetPathRoot(AppContext.BaseDirectory)
|
||||
?? string.Empty;
|
||||
var drive = !string.IsNullOrWhiteSpace(root)
|
||||
? new DriveInfo(root)
|
||||
: DriveInfo.GetDrives().FirstOrDefault(d => d.IsReady);
|
||||
|
||||
if (drive is { IsReady: true })
|
||||
{
|
||||
var totalBytes = drive.TotalSize;
|
||||
var freeBytes = drive.AvailableFreeSpace;
|
||||
return new
|
||||
{
|
||||
totalBytes,
|
||||
freeBytes,
|
||||
usedBytes = Math.Max(0, totalBytes - freeBytes)
|
||||
};
|
||||
}
|
||||
|
||||
return new { totalBytes = 0L, freeBytes = 0L, usedBytes = 0L };
|
||||
}
|
||||
|
||||
private static object GetNetworkStatus()
|
||||
{
|
||||
string[] interfaces;
|
||||
try
|
||||
{
|
||||
interfaces = NetworkInterface.GetAllNetworkInterfaces()
|
||||
.Where(nic => nic.OperationalStatus == OperationalStatus.Up)
|
||||
.Select(nic => nic.NetworkInterfaceType switch
|
||||
{
|
||||
NetworkInterfaceType.Wireless80211 => "wifi",
|
||||
NetworkInterfaceType.Ethernet
|
||||
or NetworkInterfaceType.GigabitEthernet
|
||||
or NetworkInterfaceType.FastEthernetFx
|
||||
or NetworkInterfaceType.FastEthernetT => "wired",
|
||||
NetworkInterfaceType.Ppp
|
||||
or NetworkInterfaceType.Wwanpp
|
||||
or NetworkInterfaceType.Wwanpp2 => "cellular",
|
||||
_ => "other"
|
||||
})
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
catch { interfaces = []; }
|
||||
|
||||
bool isAvailable;
|
||||
try { isAvailable = NetworkInterface.GetIsNetworkAvailable(); }
|
||||
catch { isAvailable = false; }
|
||||
|
||||
return new
|
||||
{
|
||||
status = isAvailable ? "satisfied" : "unsatisfied",
|
||||
isExpensive = false,
|
||||
isConstrained = false,
|
||||
interfaces
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
85
src/OpenClaw.Shared/Capabilities/LocationCapability.cs
Normal file
85
src/OpenClaw.Shared/Capabilities/LocationCapability.cs
Normal file
@ -0,0 +1,85 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Location capability using Windows.Devices.Geolocation
|
||||
/// </summary>
|
||||
public class LocationCapability : NodeCapabilityBase
|
||||
{
|
||||
public override string Category => "location";
|
||||
|
||||
private static readonly string[] _commands = new[] { "location.get" };
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
public event Func<LocationGetArgs, Task<LocationResult>>? GetRequested;
|
||||
|
||||
public LocationCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
}
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
return request.Command switch
|
||||
{
|
||||
"location.get" => await HandleGetAsync(request),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleGetAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var accuracy = GetStringArg(request.Args, "accuracy", "default");
|
||||
var maxAgeMs = GetIntArg(request.Args, "maxAge", 30000);
|
||||
var timeoutMs = GetIntArg(request.Args, "locationTimeout", 10000);
|
||||
|
||||
Logger.Info($"location.get: accuracy={accuracy}, maxAge={maxAgeMs}, timeout={timeoutMs}");
|
||||
|
||||
if (GetRequested == null)
|
||||
return Error("Location not available");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await GetRequested(new LocationGetArgs
|
||||
{
|
||||
Accuracy = accuracy ?? "default",
|
||||
MaxAgeMs = maxAgeMs,
|
||||
TimeoutMs = timeoutMs
|
||||
});
|
||||
return Success(new
|
||||
{
|
||||
latitude = result.Latitude,
|
||||
longitude = result.Longitude,
|
||||
accuracy = result.AccuracyMeters,
|
||||
timestamp = result.TimestampMs
|
||||
});
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
return Error("LOCATION_PERMISSION_REQUIRED");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("location.get failed", ex);
|
||||
return Error("Location failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class LocationGetArgs
|
||||
{
|
||||
public string Accuracy { get; set; } = "default";
|
||||
public int MaxAgeMs { get; set; } = 30000;
|
||||
public int TimeoutMs { get; set; } = 10000;
|
||||
}
|
||||
|
||||
public class LocationResult
|
||||
{
|
||||
public double Latitude { get; set; }
|
||||
public double Longitude { get; set; }
|
||||
public double AccuracyMeters { get; set; }
|
||||
public long TimestampMs { get; set; }
|
||||
}
|
||||
@ -13,16 +13,15 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
|
||||
private static readonly string[] _commands = new[]
|
||||
{
|
||||
"screen.capture",
|
||||
"screen.list"
|
||||
// Future: "screen.record"
|
||||
"screen.snapshot",
|
||||
"screen.record"
|
||||
};
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
// Events for UI/platform-specific implementation
|
||||
public event Func<ScreenCaptureArgs, Task<ScreenCaptureResult>>? CaptureRequested;
|
||||
public event Func<Task<ScreenInfo[]>>? ListRequested;
|
||||
public event Func<ScreenRecordArgs, Task<ScreenRecordResult>>? RecordRequested;
|
||||
|
||||
public ScreenCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
@ -32,22 +31,29 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
{
|
||||
return request.Command switch
|
||||
{
|
||||
"screen.capture" => await HandleCaptureAsync(request),
|
||||
"screen.list" => await HandleListAsync(request),
|
||||
"screen.snapshot" => await HandleCaptureAsync(request),
|
||||
"screen.record" => await HandleRecordAsync(request),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
// Clamp bounds — reject extreme caller values before any work starts.
|
||||
private const int MinDimension = 16;
|
||||
private const int MaxScreenWidth = 7680; // 8K horizontal
|
||||
private const int MinQuality = 1;
|
||||
private const int MaxQuality = 100;
|
||||
private const int MaxScreenIndex = 32; // far above any plausible monitor count
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleCaptureAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var format = GetStringArg(request.Args, "format", "png");
|
||||
var maxWidth = GetIntArg(request.Args, "maxWidth", 1920);
|
||||
var quality = GetIntArg(request.Args, "quality", 80);
|
||||
var maxWidth = Clamp(GetIntArg(request.Args, "maxWidth", 1920), MinDimension, MaxScreenWidth);
|
||||
var quality = Clamp(GetIntArg(request.Args, "quality", 80), MinQuality, MaxQuality);
|
||||
var monitor = GetIntArg(request.Args, "monitor", 0);
|
||||
var screenIndex = GetIntArg(request.Args, "screenIndex", monitor);
|
||||
var screenIndex = Clamp(GetIntArg(request.Args, "screenIndex", monitor), 0, MaxScreenIndex);
|
||||
var includePointer = GetBoolArg(request.Args, "includePointer", true);
|
||||
|
||||
Logger.Info($"screen.capture: format={format}, maxWidth={maxWidth}, monitor={screenIndex}");
|
||||
|
||||
Logger.Info($"screen.snapshot: format={format}, maxWidth={maxWidth}, monitor={screenIndex}");
|
||||
|
||||
if (CaptureRequested == null)
|
||||
{
|
||||
@ -78,42 +84,79 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Screen capture failed", ex);
|
||||
return Error($"Capture failed: {ex.Message}");
|
||||
return Error("Capture failed");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleListAsync(NodeInvokeRequest request)
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleRecordAsync(NodeInvokeRequest request)
|
||||
{
|
||||
Logger.Info("screen.list");
|
||||
|
||||
if (ListRequested == null)
|
||||
var format = GetStringArg(request.Args, "format", "mp4");
|
||||
if (!string.IsNullOrWhiteSpace(format) &&
|
||||
!string.Equals(format, "mp4", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return Error("Screen list not available");
|
||||
return Error("Unsupported screen recording format. Only mp4 is supported.");
|
||||
}
|
||||
|
||||
|
||||
var durationMs = Clamp(GetIntArg(request.Args, "durationMs", 10000), 100, MaxRecordDurationMs);
|
||||
var fpsRaw = GetDoubleArg(request.Args, "fps", 10);
|
||||
var fps = fpsRaw < 1 ? 1 : (fpsRaw > 60 ? 60 : fpsRaw);
|
||||
var screenIndex = Clamp(GetIntArg(request.Args, "screenIndex", 0), 0, MaxScreenIndex);
|
||||
var includeAudio = GetBoolArg(request.Args, "includeAudio", false);
|
||||
|
||||
Logger.Info($"screen.record: durationMs={durationMs}, fps={fps}, screenIndex={screenIndex}, includeAudio={includeAudio}");
|
||||
|
||||
if (RecordRequested == null)
|
||||
{
|
||||
return Error("Screen recording not available");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var screens = await ListRequested();
|
||||
var formatted = new List<object>();
|
||||
foreach (var screen in screens)
|
||||
var result = await RecordRequested(new ScreenRecordArgs
|
||||
{
|
||||
formatted.Add(new
|
||||
{
|
||||
index = screen.Index,
|
||||
name = screen.Name,
|
||||
primary = screen.IsPrimary,
|
||||
bounds = new { x = screen.X, y = screen.Y, width = screen.Width, height = screen.Height },
|
||||
workingArea = new { x = screen.WorkingX, y = screen.WorkingY, width = screen.WorkingWidth, height = screen.WorkingHeight }
|
||||
});
|
||||
}
|
||||
return Success(new { screens = formatted });
|
||||
DurationMs = durationMs,
|
||||
Fps = fps,
|
||||
ScreenIndex = screenIndex,
|
||||
Format = "mp4",
|
||||
IncludeAudio = includeAudio
|
||||
});
|
||||
|
||||
return Success(new
|
||||
{
|
||||
format = result.Format,
|
||||
base64 = result.Base64,
|
||||
durationMs = result.DurationMs,
|
||||
fps = result.Fps,
|
||||
screenIndex = result.ScreenIndex,
|
||||
hasAudio = result.HasAudio
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Screen list failed", ex);
|
||||
return Error($"List failed: {ex.Message}");
|
||||
Logger.Error("Screen recording failed", ex);
|
||||
return Error("Recording failed");
|
||||
}
|
||||
}
|
||||
|
||||
private const int MaxRecordDurationMs = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
private static int Clamp(int value, int min, int max)
|
||||
=> value < min ? min : (value > max ? max : value);
|
||||
|
||||
private static double GetDoubleArg(System.Text.Json.JsonElement args, string name, double defaultValue)
|
||||
{
|
||||
if (args.ValueKind == System.Text.Json.JsonValueKind.Undefined ||
|
||||
args.ValueKind == System.Text.Json.JsonValueKind.Null)
|
||||
return defaultValue;
|
||||
|
||||
if (args.TryGetProperty(name, out var prop) && prop.ValueKind == System.Text.Json.JsonValueKind.Number)
|
||||
{
|
||||
try { return prop.GetDouble(); }
|
||||
catch (FormatException) { return defaultValue; }
|
||||
}
|
||||
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
public class ScreenCaptureArgs
|
||||
@ -133,17 +176,24 @@ public class ScreenCaptureResult
|
||||
public string Base64 { get; set; } = "";
|
||||
}
|
||||
|
||||
public class ScreenInfo
|
||||
public class ScreenRecordArgs
|
||||
{
|
||||
public int Index { get; set; }
|
||||
public string Name { get; set; } = "";
|
||||
public string Format { get; set; } = "mp4";
|
||||
public int DurationMs { get; set; } = 10000;
|
||||
public double Fps { get; set; } = 10;
|
||||
public int ScreenIndex { get; set; }
|
||||
public bool IncludeAudio { get; set; }
|
||||
}
|
||||
|
||||
public class ScreenRecordResult
|
||||
{
|
||||
public string Format { get; set; } = "mp4";
|
||||
public string Base64 { get; set; } = "";
|
||||
public int DurationMs { get; set; }
|
||||
public double Fps { get; set; }
|
||||
public int ScreenIndex { get; set; }
|
||||
public int Width { get; set; }
|
||||
public int Height { get; set; }
|
||||
public int X { get; set; }
|
||||
public int Y { get; set; }
|
||||
public int WorkingX { get; set; }
|
||||
public int WorkingY { get; set; }
|
||||
public int WorkingWidth { get; set; }
|
||||
public int WorkingHeight { get; set; }
|
||||
public bool IsPrimary { get; set; }
|
||||
public bool HasAudio { get; set; }
|
||||
}
|
||||
|
||||
|
||||
339
src/OpenClaw.Shared/Capabilities/SttCapability.cs
Normal file
339
src/OpenClaw.Shared/Capabilities/SttCapability.cs
Normal file
@ -0,0 +1,339 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Speech-to-text node capability. Three commands:
|
||||
///
|
||||
/// * <see cref="TranscribeCommand"/> — bounded fixed-duration capture + transcription.
|
||||
/// Caller must specify <c>maxDurationMs</c> (capped at <see cref="MaxTranscribeDurationMs"/>).
|
||||
/// Useful for quick "give me 5 seconds of audio" prompts.
|
||||
///
|
||||
/// * <see cref="ListenCommand"/> — VAD-driven capture that returns when speech ends
|
||||
/// or after <c>timeoutMs</c> (default <see cref="DefaultListenTimeoutMs"/>, range
|
||||
/// <see cref="MinListenTimeoutMs"/>..<see cref="MaxListenTimeoutMs"/>).
|
||||
/// Useful for conversational "listen until I stop talking" prompts.
|
||||
///
|
||||
/// * <see cref="StatusCommand"/> — reports engine readiness (no PII).
|
||||
///
|
||||
/// The actual engine lives in the tray (Whisper.net + NAudio + Silero VAD).
|
||||
/// Whisper is local-first and privacy-respecting; the legacy WinRT
|
||||
/// <c>SpeechRecognizer</c> + desktop SAPI fallback was removed because both
|
||||
/// stacks are old, can leak audio to the Microsoft cloud (online-speech),
|
||||
/// and don't work in unpackaged builds.
|
||||
///
|
||||
/// **Privacy invariants for the response surface:**
|
||||
/// - Validation errors never echo the caller-supplied language string.
|
||||
/// - Handler exceptions never propagate their <c>Message</c> into the response;
|
||||
/// full detail stays in the local logger only. This is critical because
|
||||
/// failed-invoke errors land in recent activity / support bundles.
|
||||
/// - <see cref="StatusCommand"/> response carries no PII (no transcript fragments,
|
||||
/// no language history, no device IDs, no model paths).
|
||||
/// </summary>
|
||||
public sealed class SttCapability : NodeCapabilityBase
|
||||
{
|
||||
public const string TranscribeCommand = "stt.transcribe";
|
||||
public const string ListenCommand = "stt.listen";
|
||||
public const string StatusCommand = "stt.status";
|
||||
|
||||
public const int MaxTranscribeDurationMs = 30_000;
|
||||
public const int MinListenTimeoutMs = 1_000;
|
||||
public const int MaxListenTimeoutMs = 120_000;
|
||||
public const int DefaultListenTimeoutMs = 30_000;
|
||||
|
||||
public const string DefaultLanguage = "en-US";
|
||||
public const string AutoLanguage = "auto";
|
||||
|
||||
/// <summary>
|
||||
/// Engine identifier returned in <c>engineEffective</c> on every successful
|
||||
/// stt.* response. Currently always <c>"whisper"</c>; the field exists so
|
||||
/// adding a future engine doesn't break the wire shape.
|
||||
/// </summary>
|
||||
public const string EngineWhisper = "whisper";
|
||||
|
||||
private static readonly string[] _commands = [TranscribeCommand, ListenCommand, StatusCommand];
|
||||
|
||||
// Conservative BCP-47 check: 2-3 letter language, optional script
|
||||
// (4 letter), optional region (2 letter or 3 digit), each separated
|
||||
// by a hyphen. Rejects whitespace and punctuation that would otherwise
|
||||
// trip Windows.Globalization.Language ctor. The literal "auto"
|
||||
// sentinel is accepted in addition (Whisper supports auto-detect).
|
||||
private static readonly Regex BcpTagRegex = new(
|
||||
"^[A-Za-z]{2,3}(?:-[A-Za-z]{4})?(?:-(?:[A-Za-z]{2}|[0-9]{3}))?$",
|
||||
RegexOptions.Compiled);
|
||||
|
||||
public override string Category => "stt";
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
/// <summary>
|
||||
/// Tray-side handler for <see cref="TranscribeCommand"/>: bounded fixed-duration
|
||||
/// capture + transcription.
|
||||
/// </summary>
|
||||
public event Func<SttTranscribeArgs, CancellationToken, Task<SttTranscribeResult>>? TranscribeRequested;
|
||||
|
||||
/// <summary>
|
||||
/// Tray-side handler for <see cref="ListenCommand"/>: VAD-driven capture that
|
||||
/// returns on end-of-speech or after <c>timeoutMs</c>.
|
||||
/// </summary>
|
||||
public event Func<SttListenArgs, CancellationToken, Task<SttListenResult>>? ListenRequested;
|
||||
|
||||
/// <summary>
|
||||
/// Tray-side handler for <see cref="StatusCommand"/>: returns per-engine readiness.
|
||||
/// </summary>
|
||||
public event Func<CancellationToken, Task<SttStatusResult>>? StatusRequested;
|
||||
|
||||
public SttCapability(IOpenClawLogger logger) : base(logger) { }
|
||||
|
||||
/// <summary>
|
||||
/// Trim and validate a single language tag. Returns the trimmed tag on
|
||||
/// success, the literal <see cref="AutoLanguage"/> sentinel on a case-insensitive
|
||||
/// "auto" input, or <c>null</c> if the input is neither.
|
||||
/// Public so UI surfaces can validate against the same rule the wire applies.
|
||||
/// </summary>
|
||||
public static string? NormalizeLanguageTag(string tag)
|
||||
{
|
||||
var trimmed = tag.Trim();
|
||||
if (string.Equals(trimmed, AutoLanguage, StringComparison.OrdinalIgnoreCase))
|
||||
return AutoLanguage;
|
||||
return BcpTagRegex.IsMatch(trimmed) ? trimmed : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolve the language to use for a recognition call: per-call argument
|
||||
/// wins, then configured setting, then <see cref="DefaultLanguage"/>.
|
||||
/// Returns <c>null</c> if the resolved string fails validation.
|
||||
/// </summary>
|
||||
public static string? ResolveLanguage(string? requested, string? configured)
|
||||
{
|
||||
var candidate = !string.IsNullOrWhiteSpace(requested)
|
||||
? requested
|
||||
: (!string.IsNullOrWhiteSpace(configured) ? configured : DefaultLanguage);
|
||||
|
||||
return NormalizeLanguageTag(candidate!);
|
||||
}
|
||||
|
||||
public override Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
=> ExecuteAsync(request, CancellationToken.None);
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(
|
||||
NodeInvokeRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
return request.Command switch
|
||||
{
|
||||
TranscribeCommand => await HandleTranscribeAsync(request, cancellationToken).ConfigureAwait(false),
|
||||
ListenCommand => await HandleListenAsync(request, cancellationToken).ConfigureAwait(false),
|
||||
StatusCommand => await HandleStatusAsync(cancellationToken).ConfigureAwait(false),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleTranscribeAsync(
|
||||
NodeInvokeRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// maxDurationMs is required and bounded server-side. We deliberately
|
||||
// reject 0/negative rather than substituting a default — callers
|
||||
// explicitly choose how much mic time they're spending.
|
||||
var maxDurationMs = GetIntArg(request.Args, "maxDurationMs", 0);
|
||||
if (maxDurationMs <= 0)
|
||||
return Error("Missing required maxDurationMs");
|
||||
if (maxDurationMs > MaxTranscribeDurationMs)
|
||||
return Error($"maxDurationMs exceeds {MaxTranscribeDurationMs} ms");
|
||||
|
||||
var requestedLanguage = GetStringArg(request.Args, "language");
|
||||
string? resolvedLanguage = null;
|
||||
if (!string.IsNullOrWhiteSpace(requestedLanguage))
|
||||
{
|
||||
resolvedLanguage = NormalizeLanguageTag(requestedLanguage);
|
||||
if (resolvedLanguage == null)
|
||||
return Error("Invalid language tag");
|
||||
}
|
||||
|
||||
if (TranscribeRequested == null)
|
||||
return Error("STT transcribe not available");
|
||||
|
||||
var args = new SttTranscribeArgs
|
||||
{
|
||||
MaxDurationMs = maxDurationMs,
|
||||
Language = resolvedLanguage // null lets the tray fall back to its configured setting
|
||||
};
|
||||
|
||||
Logger.Info($"stt.transcribe: maxDurationMs={args.MaxDurationMs}, language={args.Language ?? "(default)"}");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await TranscribeRequested(args, cancellationToken).ConfigureAwait(false);
|
||||
return Success(new
|
||||
{
|
||||
transcribed = result.Transcribed,
|
||||
text = result.Text,
|
||||
durationMs = result.DurationMs,
|
||||
language = result.Language,
|
||||
engineEffective = result.EngineEffective
|
||||
});
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return Error("Transcribe canceled");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Privacy: never echo raw exception text into the response. The
|
||||
// exception flows through the failed-invoke path and may be
|
||||
// persisted to recent activity / support bundles. Full detail
|
||||
// stays in the local log only.
|
||||
Logger.Error("STT transcribe failed", ex);
|
||||
return Error("Transcribe failed");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleListenAsync(
|
||||
NodeInvokeRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// timeoutMs is optional with a sane default; bounded both ways so
|
||||
// a hostile caller can't pin the mic open for an hour.
|
||||
var timeoutMs = GetIntArg(request.Args, "timeoutMs", DefaultListenTimeoutMs);
|
||||
if (timeoutMs < MinListenTimeoutMs) timeoutMs = MinListenTimeoutMs;
|
||||
if (timeoutMs > MaxListenTimeoutMs) timeoutMs = MaxListenTimeoutMs;
|
||||
|
||||
var requestedLanguage = GetStringArg(request.Args, "language");
|
||||
string resolvedLanguage = AutoLanguage;
|
||||
if (!string.IsNullOrWhiteSpace(requestedLanguage))
|
||||
{
|
||||
var normalized = NormalizeLanguageTag(requestedLanguage);
|
||||
if (normalized == null)
|
||||
return Error("Invalid language tag");
|
||||
resolvedLanguage = normalized;
|
||||
}
|
||||
|
||||
if (ListenRequested == null)
|
||||
return Error("STT listen not available");
|
||||
|
||||
var args = new SttListenArgs
|
||||
{
|
||||
TimeoutMs = timeoutMs,
|
||||
Language = resolvedLanguage
|
||||
};
|
||||
|
||||
Logger.Info($"stt.listen: timeoutMs={timeoutMs}, language={resolvedLanguage}");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await ListenRequested(args, cancellationToken).ConfigureAwait(false);
|
||||
return Success(new
|
||||
{
|
||||
text = result.Text,
|
||||
language = result.Language,
|
||||
durationMs = result.DurationMs,
|
||||
segments = result.Segments,
|
||||
engineEffective = result.EngineEffective
|
||||
});
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return Error("Listen canceled");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Same privacy invariant as Transcribe.
|
||||
Logger.Error("STT listen failed", ex);
|
||||
return Error("Listen failed");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleStatusAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (StatusRequested == null)
|
||||
return Error("STT status not available");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await StatusRequested(cancellationToken).ConfigureAwait(false);
|
||||
return Success(new
|
||||
{
|
||||
engine = result.Engine,
|
||||
readiness = result.Readiness,
|
||||
modelDownloadProgress = result.ModelDownloadProgress,
|
||||
isListenWithVadSupported = result.IsListenWithVadSupported,
|
||||
isBoundedTranscribeSupported = result.IsBoundedTranscribeSupported
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Status must not leak engine internals; carry only a fixed message.
|
||||
Logger.Error("STT status failed", ex);
|
||||
return Error("Status failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class SttTranscribeArgs
|
||||
{
|
||||
public int MaxDurationMs { get; set; }
|
||||
/// <summary>
|
||||
/// BCP-47 tag (e.g., "en-US"), the literal "auto" sentinel, or null
|
||||
/// to let the tray fall back to its configured <c>SttLanguage</c> setting.
|
||||
/// </summary>
|
||||
public string? Language { get; set; }
|
||||
}
|
||||
|
||||
public sealed class SttTranscribeResult
|
||||
{
|
||||
public bool Transcribed { get; set; }
|
||||
public string Text { get; set; } = "";
|
||||
public int DurationMs { get; set; }
|
||||
public string Language { get; set; } = SttCapability.DefaultLanguage;
|
||||
|
||||
/// <summary>
|
||||
/// Engine that served this call. Always <see cref="SttCapability.EngineWhisper"/>
|
||||
/// today; the field exists so a future engine doesn't break the wire.
|
||||
/// </summary>
|
||||
public string EngineEffective { get; set; } = SttCapability.EngineWhisper;
|
||||
}
|
||||
|
||||
public sealed class SttListenArgs
|
||||
{
|
||||
public int TimeoutMs { get; set; }
|
||||
/// <summary>
|
||||
/// BCP-47 tag (e.g., "en-US"), or the literal "auto" sentinel
|
||||
/// (default; lets Whisper auto-detect).
|
||||
/// </summary>
|
||||
public string Language { get; set; } = SttCapability.AutoLanguage;
|
||||
}
|
||||
|
||||
public sealed class SttListenResult
|
||||
{
|
||||
public string Text { get; set; } = "";
|
||||
public string Language { get; set; } = SttCapability.AutoLanguage;
|
||||
public int DurationMs { get; set; }
|
||||
public IReadOnlyList<SttSegment> Segments { get; set; } = Array.Empty<SttSegment>();
|
||||
|
||||
public string EngineEffective { get; set; } = SttCapability.EngineWhisper;
|
||||
}
|
||||
|
||||
public sealed class SttSegment
|
||||
{
|
||||
public string Text { get; set; } = "";
|
||||
public int StartMs { get; set; }
|
||||
public int EndMs { get; set; }
|
||||
}
|
||||
|
||||
public sealed class SttStatusResult
|
||||
{
|
||||
public string Engine { get; set; } = SttCapability.EngineWhisper;
|
||||
|
||||
/// <summary>One of "ready", "initializing", "model-downloading", "model-not-downloaded", "unavailable".</summary>
|
||||
public string Readiness { get; set; } = "unavailable";
|
||||
|
||||
/// <summary>0..1 download progress when <see cref="Readiness"/> == "model-downloading"; null otherwise.</summary>
|
||||
public double? ModelDownloadProgress { get; set; }
|
||||
|
||||
public bool IsListenWithVadSupported { get; set; }
|
||||
public bool IsBoundedTranscribeSupported { get; set; }
|
||||
}
|
||||
@ -1,8 +1,9 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
@ -12,7 +13,10 @@ namespace OpenClaw.Shared.Capabilities;
|
||||
public class SystemCapability : NodeCapabilityBase
|
||||
{
|
||||
public override string Category => "system";
|
||||
|
||||
|
||||
private const int DefaultRunTimeoutMs = 30_000;
|
||||
private const int MaxRunTimeoutMs = 600_000; // 10 minutes
|
||||
|
||||
private static readonly string[] _commands = new[]
|
||||
{
|
||||
"system.notify",
|
||||
@ -22,6 +26,26 @@ public class SystemCapability : NodeCapabilityBase
|
||||
"system.execApprovals.get",
|
||||
"system.execApprovals.set"
|
||||
};
|
||||
|
||||
private static readonly string[] DangerousAllowPatternFragments =
|
||||
[
|
||||
"remove-item",
|
||||
"rm ",
|
||||
"del ",
|
||||
"erase ",
|
||||
"rd ",
|
||||
"rmdir ",
|
||||
"format-",
|
||||
"stop-computer",
|
||||
"restart-computer",
|
||||
"shutdown",
|
||||
"invoke-webrequest",
|
||||
"invoke-restmethod",
|
||||
"start-process",
|
||||
"set-executionpolicy",
|
||||
"reg ",
|
||||
"net "
|
||||
];
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
@ -30,9 +54,13 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
// Command runner for system.run (swappable: local, docker, wsl)
|
||||
private ICommandRunner? _commandRunner;
|
||||
|
||||
|
||||
// Exec approval policy (optional - if null, all commands are allowed)
|
||||
private ExecApprovalPolicy? _approvalPolicy;
|
||||
private IExecApprovalPromptHandler? _promptHandler;
|
||||
|
||||
// V2 exec approval handler (null = legacy path; inert until explicitly set)
|
||||
private IExecApprovalV2Handler? _v2Handler;
|
||||
|
||||
public SystemCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
@ -53,6 +81,20 @@ public class SystemCapability : NodeCapabilityBase
|
||||
{
|
||||
_approvalPolicy = policy;
|
||||
}
|
||||
|
||||
public void SetPromptHandler(IExecApprovalPromptHandler promptHandler)
|
||||
{
|
||||
_promptHandler = promptHandler;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Install a V2 exec approval handler. When set, system.run routes to the V2 path
|
||||
/// instead of the legacy path. The V2 path is inert until this is called.
|
||||
/// </summary>
|
||||
public void SetV2Handler(IExecApprovalV2Handler handler)
|
||||
{
|
||||
_v2Handler = handler;
|
||||
}
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
@ -91,25 +133,11 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
private NodeInvokeResponse HandleWhich(NodeInvokeRequest request)
|
||||
{
|
||||
var bins = new List<string>();
|
||||
if (request.Args.ValueKind != System.Text.Json.JsonValueKind.Undefined &&
|
||||
request.Args.TryGetProperty("bins", out var binsEl) &&
|
||||
binsEl.ValueKind == System.Text.Json.JsonValueKind.Array)
|
||||
{
|
||||
foreach (var item in binsEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
{
|
||||
var bin = item.GetString()?.Trim();
|
||||
if (!string.IsNullOrEmpty(bin))
|
||||
bins.Add(bin);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bins.Count == 0)
|
||||
var bins = GetStringArrayArg(request.Args, "bins");
|
||||
|
||||
if (bins.Length == 0)
|
||||
return Error("Missing bins parameter");
|
||||
|
||||
|
||||
var found = new Dictionary<string, string>();
|
||||
foreach (var bin in bins)
|
||||
{
|
||||
@ -117,8 +145,8 @@ public class SystemCapability : NodeCapabilityBase
|
||||
if (resolved != null)
|
||||
found[bin] = resolved;
|
||||
}
|
||||
|
||||
Logger.Info($"system.which: queried {bins.Count} bins, found {found.Count}");
|
||||
|
||||
Logger.Info($"system.which: queried {bins.Length} bins, found {found.Count}");
|
||||
return Success(new { bins = found });
|
||||
}
|
||||
|
||||
@ -136,8 +164,8 @@ public class SystemCapability : NodeCapabilityBase
|
||||
if (OperatingSystem.IsWindows())
|
||||
{
|
||||
var pathext = Environment.GetEnvironmentVariable("PATHEXT") ?? ".EXE;.CMD;.BAT;.COM";
|
||||
extensions.AddRange(pathext.Split(';', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(e => e.ToLowerInvariant()));
|
||||
foreach (var e in pathext.Split(';', StringSplitOptions.RemoveEmptyEntries))
|
||||
extensions.Add(e.ToLowerInvariant());
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -162,45 +190,51 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
private static string FormatExecCommand(string[] argv) => ShellQuoting.FormatExecCommand(argv);
|
||||
|
||||
/// <summary>
|
||||
/// Parses a JSON "command" property as either a string array or a plain string.
|
||||
/// Returns the argv array (command as first element) or null if missing/invalid.
|
||||
/// </summary>
|
||||
private static string[]? TryParseArgv(System.Text.Json.JsonElement requestArgs)
|
||||
{
|
||||
if (requestArgs.ValueKind == System.Text.Json.JsonValueKind.Undefined ||
|
||||
!requestArgs.TryGetProperty("command", out var cmdEl))
|
||||
return null;
|
||||
|
||||
if (cmdEl.ValueKind == System.Text.Json.JsonValueKind.Array)
|
||||
{
|
||||
var list = new List<string>();
|
||||
foreach (var item in cmdEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
list.Add(item.GetString() ?? "");
|
||||
}
|
||||
return list.Count > 0 ? list.ToArray() : null;
|
||||
}
|
||||
|
||||
if (cmdEl.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
{
|
||||
var command = cmdEl.GetString();
|
||||
return command != null ? new[] { command } : null;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Pre-flight for system.run: echoes back the execution plan without running anything.
|
||||
/// The gateway uses this to build its approval context before the actual run.
|
||||
/// </summary>
|
||||
private NodeInvokeResponse HandleRunPrepare(NodeInvokeRequest request)
|
||||
{
|
||||
string? command = null;
|
||||
string[]? argv = null;
|
||||
string? rawCommand = null;
|
||||
string? cwd = null;
|
||||
|
||||
if (request.Args.ValueKind != System.Text.Json.JsonValueKind.Undefined &&
|
||||
request.Args.TryGetProperty("command", out var cmdEl))
|
||||
{
|
||||
if (cmdEl.ValueKind == System.Text.Json.JsonValueKind.Array)
|
||||
{
|
||||
var list = new List<string>();
|
||||
foreach (var item in cmdEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
list.Add(item.GetString() ?? "");
|
||||
}
|
||||
argv = list.ToArray();
|
||||
command = argv.Length > 0 ? argv[0] : null;
|
||||
}
|
||||
else if (cmdEl.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
{
|
||||
command = cmdEl.GetString();
|
||||
argv = command != null ? new[] { command } : null;
|
||||
}
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(command) || argv == null || argv.Length == 0)
|
||||
var argv = TryParseArgv(request.Args);
|
||||
if (argv == null || argv.Length == 0 || string.IsNullOrWhiteSpace(argv[0]))
|
||||
{
|
||||
return Error("Missing command parameter");
|
||||
}
|
||||
|
||||
rawCommand = GetStringArg(request.Args, "rawCommand");
|
||||
cwd = GetStringArg(request.Args, "cwd");
|
||||
var command = argv[0];
|
||||
var rawCommand = GetStringArg(request.Args, "rawCommand");
|
||||
var cwd = GetStringArg(request.Args, "cwd");
|
||||
var agentId = GetStringArg(request.Args, "agentId");
|
||||
var sessionKey = GetStringArg(request.Args, "sessionKey");
|
||||
|
||||
@ -222,6 +256,34 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleRunAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var correlationId = Guid.NewGuid().ToString("N")[..8];
|
||||
|
||||
// Routing seam (rail 2): select path, delegate — no approval logic here.
|
||||
if (_v2Handler != null)
|
||||
{
|
||||
Logger.Info($"[system.run] corr={correlationId} path=v2");
|
||||
ExecApprovalV2Result v2Result;
|
||||
try
|
||||
{
|
||||
v2Result = await _v2Handler.HandleAsync(request, correlationId);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Rail 1: no silent fallback — handler exceptions become typed denies.
|
||||
Logger.Error($"[system.run] corr={correlationId} path=v2 handler threw", ex);
|
||||
v2Result = ExecApprovalV2Result.ValidationFailed("Handler exception");
|
||||
}
|
||||
|
||||
Logger.Info($"[system.run] corr={correlationId} decision={v2Result.Code} reason={v2Result.Reason}");
|
||||
// Rail 1: no silent fallback to legacy regardless of result code.
|
||||
// In PR1 only ExecApprovalV2NullHandler exists (always unavailable); the real
|
||||
// coordinator that can produce an allow decision is wired in PR7/PR8.
|
||||
return Error($"exec-approvals-v2: {v2Result.Code} ({v2Result.Reason})");
|
||||
}
|
||||
|
||||
// Legacy path — untouched (rail 3).
|
||||
Logger.Info($"[system.run] corr={correlationId} path=legacy decision=legacy reason=legacy");
|
||||
|
||||
if (_commandRunner == null)
|
||||
{
|
||||
return Error("Command execution not available");
|
||||
@ -229,44 +291,22 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
// Per OpenClaw spec, "command" is an argv array (e.g. ["echo","Hello"]).
|
||||
// Also accept a plain string for backward compatibility.
|
||||
string? command = null;
|
||||
string[]? args = null;
|
||||
var argv = TryParseArgv(request.Args);
|
||||
string? command = argv?[0];
|
||||
string[]? args = argv?.Length > 1 ? argv[1..] : null;
|
||||
|
||||
if (request.Args.ValueKind != System.Text.Json.JsonValueKind.Undefined &&
|
||||
request.Args.TryGetProperty("command", out var cmdEl))
|
||||
// When command is a string, also check for separate "args" array
|
||||
if (argv?.Length == 1 && request.Args.TryGetProperty("args", out var argsEl) &&
|
||||
argsEl.ValueKind == System.Text.Json.JsonValueKind.Array)
|
||||
{
|
||||
if (cmdEl.ValueKind == System.Text.Json.JsonValueKind.Array)
|
||||
var list = new List<string>();
|
||||
foreach (var item in argsEl.EnumerateArray())
|
||||
{
|
||||
var argv = new List<string>();
|
||||
foreach (var item in cmdEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
argv.Add(item.GetString() ?? "");
|
||||
}
|
||||
if (argv.Count > 0)
|
||||
{
|
||||
command = argv[0];
|
||||
args = argv.Count > 1 ? argv.Skip(1).ToArray() : null;
|
||||
}
|
||||
}
|
||||
else if (cmdEl.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
{
|
||||
command = cmdEl.GetString();
|
||||
|
||||
// When command is a string, also check for separate "args" array
|
||||
if (request.Args.TryGetProperty("args", out var argsEl) &&
|
||||
argsEl.ValueKind == System.Text.Json.JsonValueKind.Array)
|
||||
{
|
||||
var list = new List<string>();
|
||||
foreach (var item in argsEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
list.Add(item.GetString() ?? "");
|
||||
}
|
||||
if (list.Count > 0)
|
||||
args = list.ToArray();
|
||||
}
|
||||
if (item.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
list.Add(item.GetString() ?? "");
|
||||
}
|
||||
if (list.Count > 0)
|
||||
args = list.ToArray();
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(command))
|
||||
@ -276,8 +316,15 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
var shell = GetStringArg(request.Args, "shell");
|
||||
var cwd = GetStringArg(request.Args, "cwd");
|
||||
var timeoutMs = GetIntArg(request.Args, "timeoutMs",
|
||||
GetIntArg(request.Args, "timeout", 30000));
|
||||
var timeoutMs = GetIntArg(request.Args, "timeoutMs",
|
||||
GetIntArg(request.Args, "timeout", DefaultRunTimeoutMs));
|
||||
// Clamp caller-supplied timeouts. timeoutMs <= 0 historically meant
|
||||
// "wait forever" inside LocalCommandRunner; that lets a wedged process
|
||||
// pin a handler slot indefinitely, so we coerce to the default. The
|
||||
// upper bound is generous but prevents a multi-day timeout request
|
||||
// from accidentally outliving the tray.
|
||||
if (timeoutMs <= 0) timeoutMs = DefaultRunTimeoutMs;
|
||||
if (timeoutMs > MaxRunTimeoutMs) timeoutMs = MaxRunTimeoutMs;
|
||||
|
||||
// Parse env dict if present
|
||||
Dictionary<string, string>? env = null;
|
||||
@ -285,20 +332,31 @@ public class SystemCapability : NodeCapabilityBase
|
||||
request.Args.TryGetProperty("env", out var envEl) &&
|
||||
envEl.ValueKind == System.Text.Json.JsonValueKind.Object)
|
||||
{
|
||||
env = new Dictionary<string, string>();
|
||||
env = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var prop in envEl.EnumerateObject())
|
||||
{
|
||||
if (prop.Value.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
env[prop.Name] = prop.Value.GetString() ?? "";
|
||||
}
|
||||
}
|
||||
|
||||
var envResult = ExecEnvSanitizer.Sanitize(env);
|
||||
if (envResult.Blocked.Length > 0)
|
||||
{
|
||||
var blockedNames = (string[])envResult.Blocked.Clone();
|
||||
Array.Sort(blockedNames, StringComparer.OrdinalIgnoreCase);
|
||||
var blockedList = string.Join(", ", blockedNames);
|
||||
Logger.Warn($"system.run DENIED: blocked environment overrides [{blockedList}]");
|
||||
return Error($"Unsafe environment variable override blocked: {blockedList}");
|
||||
}
|
||||
env = envResult.Allowed;
|
||||
|
||||
// Build the full command string for policy evaluation and logging.
|
||||
// When command arrives as an argv array, we must evaluate the entire
|
||||
// command line — not just argv[0] — so policy rules like "rm *" correctly
|
||||
// match "rm -rf /".
|
||||
var fullCommand = args != null
|
||||
? FormatExecCommand(new[] { command }.Concat(args).ToArray())
|
||||
var fullCommand = args != null
|
||||
? FormatExecCommand([command!, ..args])
|
||||
: command;
|
||||
|
||||
Logger.Info($"system.run: {fullCommand} (shell={shell ?? "auto"}, timeout={timeoutMs}ms)");
|
||||
@ -307,11 +365,28 @@ public class SystemCapability : NodeCapabilityBase
|
||||
if (_approvalPolicy != null)
|
||||
{
|
||||
var approval = _approvalPolicy.Evaluate(fullCommand, shell);
|
||||
if (!approval.Allowed)
|
||||
if (!await EnsureApprovedAsync(fullCommand, shell, approval))
|
||||
{
|
||||
Logger.Warn($"system.run DENIED: {fullCommand} ({approval.Reason})");
|
||||
return Error($"Command denied by exec policy: {approval.Reason}");
|
||||
}
|
||||
|
||||
var parseResult = ExecShellWrapperParser.Expand(fullCommand, shell);
|
||||
if (!string.IsNullOrWhiteSpace(parseResult.Error))
|
||||
{
|
||||
Logger.Warn($"system.run DENIED: {fullCommand} ({parseResult.Error})");
|
||||
return Error($"Command denied by exec policy: {parseResult.Error}");
|
||||
}
|
||||
|
||||
foreach (var target in parseResult.Targets)
|
||||
{
|
||||
var innerApproval = _approvalPolicy.Evaluate(target.Command, target.Shell);
|
||||
if (!await EnsureApprovedAsync(target.Command, target.Shell, innerApproval))
|
||||
{
|
||||
Logger.Warn($"system.run DENIED: {target.Command} ({innerApproval.Reason})");
|
||||
return Error($"Command denied by exec policy: {innerApproval.Reason}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
@ -338,9 +413,62 @@ public class SystemCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("system.run failed", ex);
|
||||
return Error($"Execution failed: {ex.Message}");
|
||||
return Error("Execution failed");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<bool> EnsureApprovedAsync(
|
||||
string command,
|
||||
string? shell,
|
||||
ExecApprovalResult approval,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (approval.Allowed)
|
||||
return true;
|
||||
|
||||
if (approval.Action != ExecApprovalAction.Prompt || _promptHandler == null || _approvalPolicy == null)
|
||||
return false;
|
||||
|
||||
var decision = await _promptHandler.RequestAsync(new ExecApprovalPromptRequest
|
||||
{
|
||||
Command = command,
|
||||
Shell = shell,
|
||||
MatchedPattern = approval.MatchedPattern,
|
||||
Reason = approval.Reason ?? "Command requires approval"
|
||||
}, cancellationToken);
|
||||
|
||||
if (decision.Kind == ExecApprovalPromptDecisionKind.Deny)
|
||||
{
|
||||
Logger.Warn($"system.run DENIED by prompt: {command} ({decision.Reason})");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (decision.Kind == ExecApprovalPromptDecisionKind.AlwaysAllow)
|
||||
{
|
||||
if (CanPersistExactAllowRule(command))
|
||||
{
|
||||
_approvalPolicy.InsertRule(0, new ExecApprovalRule
|
||||
{
|
||||
Pattern = command,
|
||||
Action = ExecApprovalAction.Allow,
|
||||
Shells = string.IsNullOrWhiteSpace(shell) ? null : [shell],
|
||||
Description = "Approved from Windows tray prompt"
|
||||
});
|
||||
Logger.Info($"system.run prompt persisted exact allow rule: {command}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Logger.Warn($"system.run prompt could not persist wildcard command; allowing once only: {command}");
|
||||
}
|
||||
}
|
||||
|
||||
Logger.Info($"system.run APPROVED by prompt: {command} ({decision.Kind})");
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool CanPersistExactAllowRule(string command) =>
|
||||
!string.IsNullOrWhiteSpace(command) &&
|
||||
command.IndexOfAny(['*', '?']) < 0;
|
||||
|
||||
private NodeInvokeResponse HandleExecApprovalsGet()
|
||||
{
|
||||
@ -350,18 +478,36 @@ public class SystemCapability : NodeCapabilityBase
|
||||
}
|
||||
|
||||
var data = _approvalPolicy.GetPolicyData();
|
||||
return Success(new
|
||||
var policyHash = _approvalPolicy.GetPolicyHash();
|
||||
var rules = data.Rules;
|
||||
var rulesSummary = new object[rules.Count];
|
||||
for (var i = 0; i < rules.Count; i++)
|
||||
{
|
||||
enabled = true,
|
||||
defaultAction = data.DefaultAction.ToString().ToLowerInvariant(),
|
||||
rules = data.Rules.Select(r => new
|
||||
var r = rules[i];
|
||||
rulesSummary[i] = new
|
||||
{
|
||||
pattern = r.Pattern,
|
||||
action = r.Action.ToString().ToLowerInvariant(),
|
||||
shells = r.Shells,
|
||||
description = r.Description,
|
||||
enabled = r.Enabled
|
||||
}).ToArray()
|
||||
};
|
||||
}
|
||||
|
||||
return Success(new
|
||||
{
|
||||
enabled = true,
|
||||
hash = policyHash,
|
||||
baseHash = policyHash,
|
||||
defaultAction = data.DefaultAction.ToString().ToLowerInvariant(),
|
||||
constraints = new
|
||||
{
|
||||
baseHashRequired = true,
|
||||
defaultAllowAllowed = false,
|
||||
broadAllowRulesAllowed = false,
|
||||
dangerousAllowRulesAllowed = false
|
||||
},
|
||||
rules = rulesSummary
|
||||
});
|
||||
}
|
||||
|
||||
@ -374,6 +520,19 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
try
|
||||
{
|
||||
var currentHash = _approvalPolicy.GetPolicyHash();
|
||||
if (!TryGetBaseHash(request.Args, out var baseHash))
|
||||
{
|
||||
Logger.Warn("execApprovals.set denied: baseHash is required");
|
||||
return Error("baseHash is required for exec approval policy updates. Refresh policy and retry.");
|
||||
}
|
||||
|
||||
if (!HashesMatch(baseHash, currentHash))
|
||||
{
|
||||
Logger.Warn("execApprovals.set denied: stale baseHash");
|
||||
return Error("Exec approval policy changed since it was loaded. Refresh policy and retry.");
|
||||
}
|
||||
|
||||
// Parse rules from args
|
||||
var rules = new List<ExecApprovalRule>();
|
||||
|
||||
@ -402,15 +561,18 @@ public class SystemCapability : NodeCapabilityBase
|
||||
if (ruleEl.TryGetProperty("description", out var descEl) && descEl.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
rule.Description = descEl.GetString();
|
||||
|
||||
if (ruleEl.TryGetProperty("enabled", out var enEl) && enEl.ValueKind == System.Text.Json.JsonValueKind.True || enEl.ValueKind == System.Text.Json.JsonValueKind.False)
|
||||
if (ruleEl.TryGetProperty("enabled", out var enEl) && (enEl.ValueKind == System.Text.Json.JsonValueKind.True || enEl.ValueKind == System.Text.Json.JsonValueKind.False))
|
||||
rule.Enabled = enEl.GetBoolean();
|
||||
|
||||
if (ruleEl.TryGetProperty("shells", out var shellsEl) && shellsEl.ValueKind == System.Text.Json.JsonValueKind.Array)
|
||||
{
|
||||
rule.Shells = shellsEl.EnumerateArray()
|
||||
.Where(s => s.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
.Select(s => s.GetString() ?? "")
|
||||
.ToArray();
|
||||
var shellsList = new List<string>(shellsEl.GetArrayLength());
|
||||
foreach (var s in shellsEl.EnumerateArray())
|
||||
{
|
||||
if (s.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
shellsList.Add(s.GetString() ?? "");
|
||||
}
|
||||
rule.Shells = shellsList.ToArray();
|
||||
}
|
||||
|
||||
rules.Add(rule);
|
||||
@ -429,18 +591,115 @@ public class SystemCapability : NodeCapabilityBase
|
||||
_ => ExecApprovalAction.Deny
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
if (defaultAction == ExecApprovalAction.Allow)
|
||||
{
|
||||
Logger.Warn("execApprovals.set denied: default allow is not permitted");
|
||||
return Error("Default allow is not permitted for remote exec approval policy updates.");
|
||||
}
|
||||
|
||||
var validationError = ValidateExecApprovalRules(rules);
|
||||
if (validationError != null)
|
||||
{
|
||||
Logger.Warn($"execApprovals.set denied: {validationError}");
|
||||
return Error(validationError);
|
||||
}
|
||||
|
||||
_approvalPolicy.SetRules(rules, defaultAction);
|
||||
var newHash = _approvalPolicy.GetPolicyHash();
|
||||
Logger.Info($"Exec approval policy updated: {rules.Count} rules");
|
||||
|
||||
return Success(new { updated = true, ruleCount = rules.Count });
|
||||
|
||||
return Success(new { updated = true, ruleCount = rules.Count, hash = newHash, baseHash = newHash });
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("execApprovals.set failed", ex);
|
||||
return Error($"Failed to update policy: {ex.Message}");
|
||||
return Error("Failed to update policy");
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ValidateExecApprovalRules(IEnumerable<ExecApprovalRule> rules)
|
||||
{
|
||||
foreach (var rule in rules)
|
||||
{
|
||||
if (rule.Action != ExecApprovalAction.Allow)
|
||||
continue;
|
||||
|
||||
var pattern = rule.Pattern.Trim();
|
||||
if (string.IsNullOrWhiteSpace(pattern))
|
||||
return "Empty allow rule patterns are not permitted.";
|
||||
|
||||
var normalized = pattern.ToLowerInvariant();
|
||||
|
||||
// Catch all-wildcard patterns (e.g. *, **, ?*, * ?) that match any command.
|
||||
// Strip every wildcard character and whitespace; if nothing remains the pattern
|
||||
// is effectively "match everything" and must be blocked regardless of spelling.
|
||||
var nonWildcardContent = normalized.Replace("*", "").Replace("?", "").Trim();
|
||||
if (string.IsNullOrEmpty(nonWildcardContent))
|
||||
return $"Broad allow rule is not permitted: {pattern}";
|
||||
|
||||
// Catch shell-prefixed blanket patterns that match all commands in a given shell
|
||||
// (e.g. "powershell *" allows every PowerShell command).
|
||||
if (normalized is "powershell *" or "pwsh *" or "cmd *" or "cmd.exe *")
|
||||
return $"Broad allow rule is not permitted: {pattern}";
|
||||
|
||||
foreach (var dangerous in DangerousAllowPatternFragments)
|
||||
{
|
||||
if (normalized.Contains(dangerous, StringComparison.Ordinal))
|
||||
return $"Dangerous allow rule is not permitted: {pattern}";
|
||||
|
||||
// Also block stem+wildcard (e.g. "rm*" bypasses "rm " because the
|
||||
// fragment has a trailing space that the wildcard replaces).
|
||||
var stem = dangerous.TrimEnd();
|
||||
if (stem.Length < dangerous.Length &&
|
||||
(normalized.Contains(stem + "*", StringComparison.Ordinal) ||
|
||||
normalized.Contains(stem + "?", StringComparison.Ordinal)))
|
||||
{
|
||||
return $"Dangerous allow rule is not permitted: {pattern}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool TryGetBaseHash(System.Text.Json.JsonElement args, out string baseHash)
|
||||
{
|
||||
baseHash = "";
|
||||
if (args.ValueKind == System.Text.Json.JsonValueKind.Undefined)
|
||||
return false;
|
||||
|
||||
if (args.TryGetProperty("baseHash", out var baseHashEl) &&
|
||||
baseHashEl.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
{
|
||||
baseHash = baseHashEl.GetString() ?? "";
|
||||
return !string.IsNullOrWhiteSpace(baseHash);
|
||||
}
|
||||
|
||||
if (args.TryGetProperty("base_hash", out var baseHashSnakeEl) &&
|
||||
baseHashSnakeEl.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
{
|
||||
baseHash = baseHashSnakeEl.GetString() ?? "";
|
||||
return !string.IsNullOrWhiteSpace(baseHash);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool HashesMatch(string candidate, string currentHash)
|
||||
{
|
||||
if (string.Equals(candidate, currentHash, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
const string prefix = "sha256:";
|
||||
if (currentHash.StartsWith(prefix, StringComparison.OrdinalIgnoreCase) &&
|
||||
string.Equals(candidate, currentHash[prefix.Length..], StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public class SystemNotifyArgs : EventArgs
|
||||
|
||||
119
src/OpenClaw.Shared/Capabilities/TtsCapability.cs
Normal file
119
src/OpenClaw.Shared/Capabilities/TtsCapability.cs
Normal file
@ -0,0 +1,119 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
public sealed class TtsCapability : NodeCapabilityBase
|
||||
{
|
||||
public const string SpeakCommand = "tts.speak";
|
||||
public const string WindowsProvider = "windows";
|
||||
public const string ElevenLabsProvider = "elevenlabs";
|
||||
/// <summary>
|
||||
/// Local neural TTS via Sherpa-ONNX wrapping Piper voices. No network
|
||||
/// egress; voice models download once to %LOCALAPPDATA%.
|
||||
/// </summary>
|
||||
public const string PiperProvider = "piper";
|
||||
public const int MaxTextLength = 5000;
|
||||
|
||||
private static readonly string[] _commands = [SpeakCommand];
|
||||
|
||||
public override string Category => "tts";
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
public event Func<TtsSpeakArgs, CancellationToken, Task<TtsSpeakResult>>? SpeakRequested;
|
||||
|
||||
public TtsCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
}
|
||||
|
||||
public static string ResolveProvider(string? requestedProvider, string? configuredProvider)
|
||||
{
|
||||
var provider = string.IsNullOrWhiteSpace(requestedProvider)
|
||||
? configuredProvider
|
||||
: requestedProvider;
|
||||
|
||||
return string.IsNullOrWhiteSpace(provider)
|
||||
? PiperProvider
|
||||
: provider.Trim().ToLowerInvariant();
|
||||
}
|
||||
|
||||
public override Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
=> ExecuteAsync(request, CancellationToken.None);
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(
|
||||
NodeInvokeRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!string.Equals(request.Command, SpeakCommand, StringComparison.Ordinal))
|
||||
return Error($"Unknown command: {request.Command}");
|
||||
|
||||
var text = GetStringArg(request.Args, "text")?.Trim();
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
return Error("Missing required text");
|
||||
if (text.Length > MaxTextLength)
|
||||
return Error($"TTS text exceeds {MaxTextLength} characters.");
|
||||
|
||||
if (SpeakRequested == null)
|
||||
return Error("TTS speak not available");
|
||||
|
||||
var args = new TtsSpeakArgs
|
||||
{
|
||||
Text = text,
|
||||
Provider = NormalizeOptional(GetStringArg(request.Args, "provider")),
|
||||
VoiceId = NormalizeOptional(GetStringArg(request.Args, "voiceId")),
|
||||
Model = NormalizeOptional(GetStringArg(request.Args, "model")),
|
||||
Interrupt = GetBoolArg(request.Args, "interrupt")
|
||||
};
|
||||
|
||||
Logger.Info($"tts.speak: provider={args.Provider ?? "(default)"}, chars={args.Text.Length}, interrupt={args.Interrupt}");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await SpeakRequested(args, cancellationToken).ConfigureAwait(false);
|
||||
return Success(new
|
||||
{
|
||||
spoken = result.Spoken,
|
||||
provider = result.Provider,
|
||||
contentType = result.ContentType,
|
||||
durationMs = result.DurationMs
|
||||
});
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return Error("Speak canceled");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Privacy: never echo raw exception text into the response. The
|
||||
// exception flows through the failed-invoke path and may be
|
||||
// persisted to recent activity / support bundles. ElevenLabs
|
||||
// error messages can contain key prefixes; OS speech errors
|
||||
// can contain device names. Full detail stays in the local
|
||||
// log only. (Same pattern as SttCapability.)
|
||||
Logger.Error("TTS speak failed", ex);
|
||||
return Error("Speak failed");
|
||||
}
|
||||
}
|
||||
|
||||
private static string? NormalizeOptional(string? value)
|
||||
=> string.IsNullOrWhiteSpace(value) ? null : value.Trim();
|
||||
}
|
||||
|
||||
public sealed class TtsSpeakArgs
|
||||
{
|
||||
public string Text { get; set; } = "";
|
||||
public string? Provider { get; set; }
|
||||
public string? VoiceId { get; set; }
|
||||
public string? Model { get; set; }
|
||||
public bool Interrupt { get; set; }
|
||||
}
|
||||
|
||||
public sealed class TtsSpeakResult
|
||||
{
|
||||
public bool Spoken { get; set; } = true;
|
||||
public string Provider { get; set; } = TtsCapability.WindowsProvider;
|
||||
public string? ContentType { get; set; }
|
||||
public int? DurationMs { get; set; }
|
||||
}
|
||||
@ -20,10 +20,13 @@ public static class DeepLinkParser
|
||||
if (!uri.StartsWith(Scheme, StringComparison.OrdinalIgnoreCase))
|
||||
return null;
|
||||
|
||||
var remainder = uri[Scheme.Length..].TrimEnd('/');
|
||||
var remainder = uri[Scheme.Length..];
|
||||
var queryIndex = remainder.IndexOf('?');
|
||||
var query = queryIndex >= 0 ? remainder[(queryIndex + 1)..] : "";
|
||||
var path = queryIndex >= 0 ? remainder[..queryIndex] : remainder;
|
||||
// Trim trailing slash AFTER splitting off the query so the
|
||||
// Windows-canonicalized form `openclaw://send/?args=...` (slash
|
||||
// BEFORE the `?`) yields path "send", not "send/".
|
||||
var path = (queryIndex >= 0 ? remainder[..queryIndex] : remainder).TrimEnd('/');
|
||||
|
||||
var parameters = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var part in query.Split('&', StringSplitOptions.RemoveEmptyEntries))
|
||||
|
||||
@ -1,8 +1,11 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using OpenClaw.Shared.Mcp;
|
||||
using NSec.Cryptography;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
@ -18,12 +21,66 @@ public class DeviceIdentity
|
||||
private PublicKey? _publicKey;
|
||||
private string? _deviceId;
|
||||
private string? _deviceToken;
|
||||
private string[]? _deviceTokenScopes;
|
||||
private string? _nodeDeviceToken;
|
||||
private string[]? _nodeDeviceTokenScopes;
|
||||
|
||||
private static readonly SignatureAlgorithm Ed25519Algorithm = SignatureAlgorithm.Ed25519;
|
||||
|
||||
public string DeviceId => _deviceId ?? throw new InvalidOperationException("Device not initialized");
|
||||
public string PublicKeyBase64Url => _publicKey != null ? Base64UrlEncode(_publicKey.Export(KeyBlobFormat.RawPublicKey)) : throw new InvalidOperationException("Device not initialized");
|
||||
public string? DeviceToken => _deviceToken;
|
||||
public IReadOnlyList<string>? DeviceTokenScopes => _deviceTokenScopes;
|
||||
public string? NodeDeviceToken => _nodeDeviceToken;
|
||||
public IReadOnlyList<string>? NodeDeviceTokenScopes => _nodeDeviceTokenScopes;
|
||||
|
||||
public static string? TryReadStoredDeviceToken(string dataPath, IOpenClawLogger? logger = null) =>
|
||||
TryReadStoredDeviceTokenForRole(dataPath, "operator", logger);
|
||||
|
||||
public static string? TryReadStoredDeviceTokenForRole(string dataPath, string role, IOpenClawLogger? logger = null)
|
||||
{
|
||||
var tokenRole = ParseDeviceTokenRole(role);
|
||||
var keyPath = Path.Combine(dataPath, "device-key-ed25519.json");
|
||||
if (!File.Exists(keyPath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(File.ReadAllText(keyPath));
|
||||
var tokenPropertyName = tokenRole == DeviceTokenRole.Node
|
||||
? nameof(DeviceKeyData.NodeDeviceToken)
|
||||
: nameof(DeviceKeyData.DeviceToken);
|
||||
|
||||
if (doc.RootElement.TryGetProperty(tokenPropertyName, out var deviceToken) &&
|
||||
deviceToken.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var value = deviceToken.GetString();
|
||||
return string.IsNullOrWhiteSpace(value) ? null : value;
|
||||
}
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
logger?.Warn($"Failed to read stored device token: {ex.Message}");
|
||||
}
|
||||
catch (UnauthorizedAccessException ex)
|
||||
{
|
||||
logger?.Warn($"Failed to read stored device token: {ex.Message}");
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
logger?.Warn($"Failed to read stored device token: {ex.Message}");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static bool HasStoredDeviceToken(string dataPath, IOpenClawLogger? logger = null) =>
|
||||
!string.IsNullOrWhiteSpace(TryReadStoredDeviceToken(dataPath, logger));
|
||||
|
||||
public static bool HasStoredDeviceTokenForRole(string dataPath, string role, IOpenClawLogger? logger = null) =>
|
||||
!string.IsNullOrWhiteSpace(TryReadStoredDeviceTokenForRole(dataPath, role, logger));
|
||||
|
||||
public DeviceIdentity(string dataPath, IOpenClawLogger? logger = null)
|
||||
{
|
||||
@ -65,8 +122,11 @@ public class DeviceIdentity
|
||||
_publicKey = _privateKey.PublicKey;
|
||||
_deviceId = data.DeviceId;
|
||||
_deviceToken = data.DeviceToken;
|
||||
_deviceTokenScopes = NormalizeScopes(data.DeviceTokenScopes);
|
||||
_nodeDeviceToken = data.NodeDeviceToken;
|
||||
_nodeDeviceTokenScopes = NormalizeScopes(data.NodeDeviceTokenScopes);
|
||||
|
||||
_logger.Info($"Loaded Ed25519 device identity: {_deviceId?.Substring(0, 16)}...");
|
||||
_logger.Info($"Loaded Ed25519 device identity: {_deviceId?[..16]}...");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
@ -109,8 +169,11 @@ public class DeviceIdentity
|
||||
{
|
||||
Directory.CreateDirectory(dir);
|
||||
}
|
||||
if (!string.IsNullOrEmpty(dir))
|
||||
McpAuthToken.TryRestrictDataDirectoryAcl(dir);
|
||||
|
||||
File.WriteAllText(_keyPath, JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true }));
|
||||
McpAuthToken.TryRestrictSensitiveFileAcl(_keyPath);
|
||||
_logger.Info($"Generated new Ed25519 device identity: {_deviceId}");
|
||||
}
|
||||
|
||||
@ -134,6 +197,118 @@ public class DeviceIdentity
|
||||
// Return base64url encoded signature
|
||||
return Base64UrlEncode(signature);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sign a v3 connect payload for operator/client connections.
|
||||
/// Format: v3|{deviceId}|{clientId}|{clientMode}|{role}|{scopesCsv}|{signedAtMs}|{tokenOrEmpty}|{nonce}|{platform}|{deviceFamily}
|
||||
/// </summary>
|
||||
public string SignConnectPayloadV3(
|
||||
string nonce,
|
||||
long signedAtMs,
|
||||
string clientId,
|
||||
string clientMode,
|
||||
string role,
|
||||
IEnumerable<string> scopes,
|
||||
string authToken,
|
||||
string platform,
|
||||
string deviceFamily)
|
||||
{
|
||||
if (_privateKey == null)
|
||||
throw new InvalidOperationException("Device not initialized");
|
||||
|
||||
var payload = BuildConnectPayloadV3(
|
||||
nonce,
|
||||
signedAtMs,
|
||||
clientId,
|
||||
clientMode,
|
||||
role,
|
||||
scopes,
|
||||
authToken,
|
||||
platform,
|
||||
deviceFamily);
|
||||
|
||||
var dataBytes = Encoding.UTF8.GetBytes(payload);
|
||||
var signature = Ed25519Algorithm.Sign(_privateKey, dataBytes);
|
||||
return Base64UrlEncode(signature);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build the v3 connect payload string for signing/debugging.
|
||||
/// Format: v3|{deviceId}|{clientId}|{clientMode}|{role}|{scopesCsv}|{signedAtMs}|{tokenOrEmpty}|{nonce}|{platform}|{deviceFamily}
|
||||
/// </summary>
|
||||
public string BuildConnectPayloadV3(
|
||||
string nonce,
|
||||
long signedAtMs,
|
||||
string clientId,
|
||||
string clientMode,
|
||||
string role,
|
||||
IEnumerable<string> scopes,
|
||||
string authToken,
|
||||
string platform,
|
||||
string deviceFamily)
|
||||
{
|
||||
if (_deviceId == null)
|
||||
throw new InvalidOperationException("Device not initialized");
|
||||
|
||||
var scopesCsv = string.Join(",", scopes ?? Array.Empty<string>());
|
||||
var safeToken = authToken ?? string.Empty;
|
||||
var safeNonce = nonce ?? string.Empty;
|
||||
|
||||
return $"v3|{_deviceId}|{clientId}|{clientMode}|{role}|{scopesCsv}|{signedAtMs}|{safeToken}|{safeNonce}|{platform}|{deviceFamily}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sign a v2 connect payload for compatibility mode.
|
||||
/// Format: v2|{deviceId}|{clientId}|{clientMode}|{role}|{scopesCsv}|{signedAtMs}|{tokenOrEmpty}|{nonce}
|
||||
/// </summary>
|
||||
public string SignConnectPayloadV2(
|
||||
string nonce,
|
||||
long signedAtMs,
|
||||
string clientId,
|
||||
string clientMode,
|
||||
string role,
|
||||
IEnumerable<string> scopes,
|
||||
string authToken)
|
||||
{
|
||||
if (_privateKey == null)
|
||||
throw new InvalidOperationException("Device not initialized");
|
||||
|
||||
var payload = BuildConnectPayloadV2(
|
||||
nonce,
|
||||
signedAtMs,
|
||||
clientId,
|
||||
clientMode,
|
||||
role,
|
||||
scopes,
|
||||
authToken);
|
||||
|
||||
var dataBytes = Encoding.UTF8.GetBytes(payload);
|
||||
var signature = Ed25519Algorithm.Sign(_privateKey, dataBytes);
|
||||
return Base64UrlEncode(signature);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build the v2 connect payload string for signing/debugging.
|
||||
/// Format: v2|{deviceId}|{clientId}|{clientMode}|{role}|{scopesCsv}|{signedAtMs}|{tokenOrEmpty}|{nonce}
|
||||
/// </summary>
|
||||
public string BuildConnectPayloadV2(
|
||||
string nonce,
|
||||
long signedAtMs,
|
||||
string clientId,
|
||||
string clientMode,
|
||||
string role,
|
||||
IEnumerable<string> scopes,
|
||||
string authToken)
|
||||
{
|
||||
if (_deviceId == null)
|
||||
throw new InvalidOperationException("Device not initialized");
|
||||
|
||||
var scopesCsv = string.Join(",", scopes ?? Array.Empty<string>());
|
||||
var safeToken = authToken ?? string.Empty;
|
||||
var safeNonce = nonce ?? string.Empty;
|
||||
|
||||
return $"v2|{_deviceId}|{clientId}|{clientMode}|{role}|{scopesCsv}|{signedAtMs}|{safeToken}|{safeNonce}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build the payload string (for debugging)
|
||||
@ -158,7 +333,40 @@ public class DeviceIdentity
|
||||
/// </summary>
|
||||
public void StoreDeviceToken(string token)
|
||||
{
|
||||
StoreDeviceTokenCore(token, null);
|
||||
}
|
||||
|
||||
public void StoreDeviceTokenWithScopes(string token, IEnumerable<string>? scopes)
|
||||
{
|
||||
StoreDeviceTokenCore(token, NormalizeScopes(scopes));
|
||||
}
|
||||
|
||||
public void StoreDeviceTokenForRole(string role, string token, IEnumerable<string>? scopes = null)
|
||||
{
|
||||
var tokenRole = ParseDeviceTokenRole(role);
|
||||
if (tokenRole == DeviceTokenRole.Node)
|
||||
{
|
||||
StoreNodeDeviceTokenCore(token, NormalizeScopes(scopes));
|
||||
return;
|
||||
}
|
||||
|
||||
StoreDeviceTokenCore(token, NormalizeScopes(scopes));
|
||||
}
|
||||
|
||||
private static DeviceTokenRole ParseDeviceTokenRole(string role) => role switch
|
||||
{
|
||||
"operator" => DeviceTokenRole.Operator,
|
||||
"node" => DeviceTokenRole.Node,
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(role), "Device token role must be 'operator' or 'node'.")
|
||||
};
|
||||
|
||||
private void StoreDeviceTokenCore(string token, string[]? scopes)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(token))
|
||||
throw new ArgumentException("Device token cannot be empty.", nameof(token));
|
||||
|
||||
_deviceToken = token;
|
||||
_deviceTokenScopes = scopes;
|
||||
|
||||
// Update the key file with the token
|
||||
try
|
||||
@ -170,7 +378,9 @@ public class DeviceIdentity
|
||||
if (data != null)
|
||||
{
|
||||
data.DeviceToken = token;
|
||||
data.DeviceTokenScopes = scopes;
|
||||
File.WriteAllText(_keyPath, JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true }));
|
||||
McpAuthToken.TryRestrictSensitiveFileAcl(_keyPath);
|
||||
_logger.Info("Device token stored");
|
||||
}
|
||||
}
|
||||
@ -180,6 +390,48 @@ public class DeviceIdentity
|
||||
_logger.Error($"Failed to store device token: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private void StoreNodeDeviceTokenCore(string token, string[]? scopes)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(token))
|
||||
throw new ArgumentException("Device token cannot be empty.", nameof(token));
|
||||
|
||||
_nodeDeviceToken = token;
|
||||
_nodeDeviceTokenScopes = scopes;
|
||||
|
||||
try
|
||||
{
|
||||
if (File.Exists(_keyPath))
|
||||
{
|
||||
var json = File.ReadAllText(_keyPath);
|
||||
var data = JsonSerializer.Deserialize<DeviceKeyData>(json);
|
||||
if (data != null)
|
||||
{
|
||||
data.NodeDeviceToken = token;
|
||||
data.NodeDeviceTokenScopes = scopes;
|
||||
File.WriteAllText(_keyPath, JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true }));
|
||||
_logger.Info("Node device token stored");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Error($"Failed to store node device token: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string[]? NormalizeScopes(IEnumerable<string>? scopes)
|
||||
{
|
||||
if (scopes == null)
|
||||
return null;
|
||||
|
||||
var normalized = scopes
|
||||
.Where(scope => !string.IsNullOrWhiteSpace(scope))
|
||||
.Select(scope => scope.Trim())
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
return normalized.Length == 0 ? null : normalized;
|
||||
}
|
||||
|
||||
private static string Base64UrlEncode(byte[] data)
|
||||
{
|
||||
@ -189,12 +441,21 @@ public class DeviceIdentity
|
||||
.TrimEnd('=');
|
||||
}
|
||||
|
||||
private enum DeviceTokenRole
|
||||
{
|
||||
Operator,
|
||||
Node
|
||||
}
|
||||
|
||||
private class DeviceKeyData
|
||||
{
|
||||
public string? PrivateKeyBase64 { get; set; }
|
||||
public string? PublicKeyBase64 { get; set; }
|
||||
public string? DeviceId { get; set; }
|
||||
public string? DeviceToken { get; set; }
|
||||
public string[]? DeviceTokenScopes { get; set; }
|
||||
public string? NodeDeviceToken { get; set; }
|
||||
public string[]? NodeDeviceTokenScopes { get; set; }
|
||||
public string? Algorithm { get; set; }
|
||||
public long CreatedAt { get; set; }
|
||||
}
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Text.RegularExpressions;
|
||||
@ -34,7 +36,7 @@ public enum ExecApprovalAction
|
||||
{
|
||||
Allow,
|
||||
Deny,
|
||||
Prompt // Future: show user a confirmation dialog
|
||||
Prompt
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -60,8 +62,10 @@ public class ExecApprovalPolicy
|
||||
private List<ExecApprovalRule> _rules = new();
|
||||
private ExecApprovalAction _defaultAction = ExecApprovalAction.Deny;
|
||||
|
||||
// Compiled regex cache for patterns
|
||||
private readonly Dictionary<string, Regex> _regexCache = new();
|
||||
// Compiled regex cache — ConcurrentDictionary for thread safety.
|
||||
// Pattern → compiled Regex mapping never changes for a given pattern string
|
||||
// (glob-to-regex conversion is deterministic), so no cache invalidation is needed.
|
||||
private static readonly ConcurrentDictionary<string, Regex> _regexCache = new(StringComparer.Ordinal);
|
||||
|
||||
/// <summary>Current rules (read-only view)</summary>
|
||||
public IReadOnlyList<ExecApprovalRule> Rules => _rules.AsReadOnly();
|
||||
@ -95,8 +99,7 @@ public class ExecApprovalPolicy
|
||||
};
|
||||
}
|
||||
|
||||
// Compute once; only used if any rule has shell filters.
|
||||
var normalizedShell = (shell ?? "powershell").ToLowerInvariant();
|
||||
var shellSpan = (shell ?? "powershell").AsSpan();
|
||||
|
||||
foreach (var rule in _rules)
|
||||
{
|
||||
@ -105,8 +108,16 @@ public class ExecApprovalPolicy
|
||||
// Check shell filter
|
||||
if (rule.Shells is { Length: > 0 })
|
||||
{
|
||||
if (!rule.Shells.Any(s => s.Equals(normalizedShell, StringComparison.OrdinalIgnoreCase)))
|
||||
continue;
|
||||
var shellMatched = false;
|
||||
foreach (var s in rule.Shells)
|
||||
{
|
||||
if (s.AsSpan().Equals(shellSpan, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
shellMatched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!shellMatched) continue;
|
||||
}
|
||||
|
||||
// Check pattern match
|
||||
@ -143,7 +154,6 @@ public class ExecApprovalPolicy
|
||||
public void AddRule(ExecApprovalRule rule)
|
||||
{
|
||||
_rules.Add(rule);
|
||||
ClearRegexCache();
|
||||
Save();
|
||||
}
|
||||
|
||||
@ -154,7 +164,6 @@ public class ExecApprovalPolicy
|
||||
{
|
||||
index = Math.Clamp(index, 0, _rules.Count);
|
||||
_rules.Insert(index, rule);
|
||||
ClearRegexCache();
|
||||
Save();
|
||||
}
|
||||
|
||||
@ -165,7 +174,6 @@ public class ExecApprovalPolicy
|
||||
{
|
||||
if (index < 0 || index >= _rules.Count) return false;
|
||||
_rules.RemoveAt(index);
|
||||
ClearRegexCache();
|
||||
Save();
|
||||
return true;
|
||||
}
|
||||
@ -177,7 +185,6 @@ public class ExecApprovalPolicy
|
||||
{
|
||||
_rules = new List<ExecApprovalRule>(rules);
|
||||
if (defaultAction.HasValue) _defaultAction = defaultAction.Value;
|
||||
ClearRegexCache();
|
||||
Save();
|
||||
}
|
||||
|
||||
@ -192,6 +199,13 @@ public class ExecApprovalPolicy
|
||||
Rules = _rules.ToList()
|
||||
};
|
||||
}
|
||||
|
||||
public string GetPolicyHash()
|
||||
{
|
||||
var json = JsonSerializer.Serialize(GetPolicyData(), _jsonOptions);
|
||||
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(json));
|
||||
return $"sha256:{Convert.ToHexString(bytes).ToLowerInvariant()}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Load policy from disk. Creates default policy if file doesn't exist.
|
||||
@ -209,7 +223,6 @@ public class ExecApprovalPolicy
|
||||
_rules = data.Rules ?? new List<ExecApprovalRule>();
|
||||
_defaultAction = data.DefaultAction;
|
||||
_logger.Info($"[EXEC-POLICY] Loaded {_rules.Count} rules from {_policyFilePath}");
|
||||
ClearRegexCache();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -236,14 +249,8 @@ public class ExecApprovalPolicy
|
||||
var dir = Path.GetDirectoryName(_policyFilePath);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
|
||||
var data = new ExecPolicyData
|
||||
{
|
||||
DefaultAction = _defaultAction,
|
||||
Rules = _rules
|
||||
};
|
||||
|
||||
var json = JsonSerializer.Serialize(data, _jsonOptions);
|
||||
|
||||
var json = JsonSerializer.Serialize(GetPolicyData(), _jsonOptions);
|
||||
File.WriteAllText(_policyFilePath, json);
|
||||
}
|
||||
catch (Exception ex)
|
||||
@ -285,28 +292,32 @@ public class ExecApprovalPolicy
|
||||
|
||||
/// <summary>
|
||||
/// Glob-style pattern matching: * matches any chars, ? matches single char.
|
||||
/// Case-insensitive.
|
||||
/// Case-insensitive. Returns false on regex timeout (guards against ReDoS in
|
||||
/// user-supplied policy files) and denies the command as the safe default.
|
||||
/// </summary>
|
||||
internal bool MatchesPattern(string command, string pattern)
|
||||
{
|
||||
if (pattern == "*") return true;
|
||||
|
||||
if (!_regexCache.TryGetValue(pattern, out var regex))
|
||||
|
||||
var regex = _regexCache.GetOrAdd(pattern, static p =>
|
||||
{
|
||||
// Convert glob to regex
|
||||
var regexPattern = "^" + Regex.Escape(pattern)
|
||||
var regexPattern = "^" + Regex.Escape(p)
|
||||
.Replace("\\*", ".*")
|
||||
.Replace("\\?", ".") + "$";
|
||||
|
||||
regex = new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
_regexCache[pattern] = regex;
|
||||
return new Regex(regexPattern, RegexOptions.IgnoreCase | RegexOptions.Compiled, TimeSpan.FromMilliseconds(100));
|
||||
});
|
||||
|
||||
try
|
||||
{
|
||||
return regex.IsMatch(command);
|
||||
}
|
||||
catch (RegexMatchTimeoutException)
|
||||
{
|
||||
_logger.Warn($"[EXEC-POLICY] Pattern match timed out for '{pattern}'; denying as safe default");
|
||||
return false;
|
||||
}
|
||||
|
||||
return regex.IsMatch(command);
|
||||
}
|
||||
|
||||
private void ClearRegexCache() => _regexCache.Clear();
|
||||
|
||||
|
||||
private static readonly JsonSerializerOptions _jsonOptions = new()
|
||||
{
|
||||
WriteIndented = true,
|
||||
|
||||
40
src/OpenClaw.Shared/ExecApprovalPrompt.cs
Normal file
40
src/OpenClaw.Shared/ExecApprovalPrompt.cs
Normal file
@ -0,0 +1,40 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
public enum ExecApprovalPromptDecisionKind
|
||||
{
|
||||
Deny,
|
||||
AllowOnce,
|
||||
AlwaysAllow
|
||||
}
|
||||
|
||||
public sealed class ExecApprovalPromptRequest
|
||||
{
|
||||
public string Command { get; init; } = "";
|
||||
public string? Shell { get; init; }
|
||||
public string? MatchedPattern { get; init; }
|
||||
public string Reason { get; init; } = "";
|
||||
}
|
||||
|
||||
public sealed class ExecApprovalPromptDecision
|
||||
{
|
||||
private ExecApprovalPromptDecision(ExecApprovalPromptDecisionKind kind, string reason)
|
||||
{
|
||||
Kind = kind;
|
||||
Reason = reason;
|
||||
}
|
||||
|
||||
public ExecApprovalPromptDecisionKind Kind { get; }
|
||||
public string Reason { get; }
|
||||
|
||||
public static ExecApprovalPromptDecision Deny(string reason = "Denied by user") => new(ExecApprovalPromptDecisionKind.Deny, reason);
|
||||
public static ExecApprovalPromptDecision AllowOnce(string reason = "Allowed once by user") => new(ExecApprovalPromptDecisionKind.AllowOnce, reason);
|
||||
public static ExecApprovalPromptDecision AlwaysAllow(string reason = "Always allowed by user") => new(ExecApprovalPromptDecisionKind.AlwaysAllow, reason);
|
||||
}
|
||||
|
||||
public interface IExecApprovalPromptHandler
|
||||
{
|
||||
Task<ExecApprovalPromptDecision> RequestAsync(ExecApprovalPromptRequest request, CancellationToken cancellationToken = default);
|
||||
}
|
||||
@ -0,0 +1,70 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Architectural barrier produced by PR3.
|
||||
// Equivalent to ExecHostValidatedRequest in the macOS reference, extended with resolution outputs.
|
||||
// No module from PR4 onward may accept ValidatedRunRequest as direct input (research doc 05 line 439).
|
||||
// Rail 15: a single canonical representation reused across evaluation, logging, prompting, execution.
|
||||
public sealed class CanonicalCommandIdentity
|
||||
{
|
||||
// ── Normalization outputs ─────────────────────────────────────────────────
|
||||
|
||||
// Argv exactly as produced by PR2 (no trimming; coding contract process-argv-semantics).
|
||||
public IReadOnlyList<string> Command { get; }
|
||||
|
||||
// Canonical display form generated from argv. Never rawCommand from the agent.
|
||||
// Used by logging and prompting. Research doc 05 decision 2.
|
||||
public string DisplayCommand { get; }
|
||||
|
||||
// Safe rawCommand for executable resolution. Null in Windows v1 (rawCommand not in
|
||||
// system.run protocol; research doc 05 OQ-V4 / decision 10).
|
||||
public string? EvaluationRawCommand { get; }
|
||||
|
||||
// ── Resolution outputs ────────────────────────────────────────────────────
|
||||
|
||||
// Singular resolution for the state machine (PR5).
|
||||
// Null if the primary executable cannot be determined.
|
||||
public ExecCommandResolution? Resolution { get; }
|
||||
|
||||
// Per-segment resolutions for the allowlist matcher (PR4/PR5).
|
||||
// Empty list means fail-closed — no allowlist satisfaction possible.
|
||||
public IReadOnlyList<ExecCommandResolution> AllowlistResolutions { get; }
|
||||
|
||||
// Suggested allowlist patterns for prompt/UI (PR6). Not a security decision.
|
||||
public IReadOnlyList<string> AllowAlwaysPatterns { get; }
|
||||
|
||||
// ── Request context (carried from ValidatedRunRequest) ────────────────────
|
||||
|
||||
public string? Cwd { get; }
|
||||
public int TimeoutMs { get; }
|
||||
public IReadOnlyDictionary<string, string>? Env { get; }
|
||||
public string? AgentId { get; }
|
||||
public string? SessionKey { get; }
|
||||
|
||||
internal CanonicalCommandIdentity(
|
||||
IReadOnlyList<string> command,
|
||||
string displayCommand,
|
||||
string? evaluationRawCommand,
|
||||
ExecCommandResolution? resolution,
|
||||
IReadOnlyList<ExecCommandResolution> allowlistResolutions,
|
||||
IReadOnlyList<string> allowAlwaysPatterns,
|
||||
string? cwd,
|
||||
int timeoutMs,
|
||||
IReadOnlyDictionary<string, string>? env,
|
||||
string? agentId,
|
||||
string? sessionKey)
|
||||
{
|
||||
Command = command;
|
||||
DisplayCommand = displayCommand;
|
||||
EvaluationRawCommand = evaluationRawCommand;
|
||||
Resolution = resolution;
|
||||
AllowlistResolutions = allowlistResolutions;
|
||||
AllowAlwaysPatterns = allowAlwaysPatterns;
|
||||
Cwd = cwd;
|
||||
TimeoutMs = timeoutMs;
|
||||
Env = env;
|
||||
AgentId = agentId;
|
||||
SessionKey = sessionKey;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,137 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 1 of the V2 exec approval pipeline: structural input validation (rail 18, step 1).
|
||||
/// Parses a raw NodeInvokeRequest into a ValidatedRunRequest or returns validation-failed.
|
||||
/// Does not resolve executables, detect shell wrappers, or evaluate policy.
|
||||
/// </summary>
|
||||
public static class ExecApprovalV2InputValidator
|
||||
{
|
||||
private const int DefaultTimeoutMs = 30_000;
|
||||
|
||||
public static ExecApprovalV2ValidationOutcome Validate(NodeInvokeRequest request)
|
||||
{
|
||||
var argv = TryParseArgv(request.Args, out bool malformedCommand);
|
||||
if (malformedCommand)
|
||||
return Deny("malformed-command");
|
||||
if (argv == null || argv.Length == 0)
|
||||
return Deny("missing-command");
|
||||
if (string.IsNullOrWhiteSpace(argv[0]))
|
||||
return Deny("empty-command");
|
||||
|
||||
// cwd — optional, but empty/whitespace is a caller error; wrong type is a protocol violation
|
||||
string? cwd = null;
|
||||
if (request.Args.ValueKind == JsonValueKind.Object &&
|
||||
request.Args.TryGetProperty("cwd", out var cwdEl))
|
||||
{
|
||||
if (cwdEl.ValueKind != JsonValueKind.String)
|
||||
return Deny("malformed-cwd");
|
||||
var rawCwd = cwdEl.GetString();
|
||||
if (string.IsNullOrWhiteSpace(rawCwd))
|
||||
return Deny("empty-cwd");
|
||||
cwd = rawCwd;
|
||||
}
|
||||
|
||||
// env — must be a JSON object if present; non-string values are a protocol violation
|
||||
IReadOnlyDictionary<string, string>? env = null;
|
||||
if (request.Args.ValueKind == JsonValueKind.Object &&
|
||||
request.Args.TryGetProperty("env", out var envEl))
|
||||
{
|
||||
if (envEl.ValueKind != JsonValueKind.Object)
|
||||
return Deny("malformed-env");
|
||||
var dict = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var prop in envEl.EnumerateObject())
|
||||
{
|
||||
if (prop.Value.ValueKind != JsonValueKind.String)
|
||||
return Deny("malformed-env");
|
||||
dict[prop.Name] = prop.Value.GetString() ?? "";
|
||||
}
|
||||
env = dict;
|
||||
}
|
||||
|
||||
// timeoutMs / timeout — positive integer; defaults to 30 000.
|
||||
// Upper-bound clamping (legacy safety limit) is enforced in the execution/policy phase, not here.
|
||||
var timeoutMs = DefaultTimeoutMs;
|
||||
if (request.Args.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
if (request.Args.TryGetProperty("timeoutMs", out var tmsEl))
|
||||
{
|
||||
if (tmsEl.ValueKind != JsonValueKind.Number || !tmsEl.TryGetInt32(out var v) || v <= 0)
|
||||
return Deny("invalid-timeout");
|
||||
timeoutMs = v;
|
||||
}
|
||||
else if (request.Args.TryGetProperty("timeout", out var tEl))
|
||||
{
|
||||
if (tEl.ValueKind != JsonValueKind.Number || !tEl.TryGetInt32(out var v) || v <= 0)
|
||||
return Deny("invalid-timeout");
|
||||
timeoutMs = v;
|
||||
}
|
||||
}
|
||||
|
||||
return ExecApprovalV2ValidationOutcome.Ok(new ValidatedRunRequest(
|
||||
argv,
|
||||
TryGetString(request.Args, "shell"),
|
||||
cwd,
|
||||
timeoutMs,
|
||||
env,
|
||||
TryGetString(request.Args, "agentId"),
|
||||
TryGetString(request.Args, "sessionKey")));
|
||||
}
|
||||
|
||||
private static ExecApprovalV2ValidationOutcome Deny(string reason)
|
||||
=> ExecApprovalV2ValidationOutcome.Fail(ExecApprovalV2Result.ValidationFailed(reason));
|
||||
|
||||
private static string[]? TryParseArgv(JsonElement args, out bool malformed)
|
||||
{
|
||||
malformed = false;
|
||||
if (args.ValueKind != JsonValueKind.Object ||
|
||||
!args.TryGetProperty("command", out var cmdEl))
|
||||
return null;
|
||||
|
||||
if (cmdEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
var list = new List<string>();
|
||||
foreach (var item in cmdEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind != JsonValueKind.String) { malformed = true; return null; }
|
||||
list.Add(item.GetString() ?? "");
|
||||
}
|
||||
return list.Count > 0 ? [.. list] : null;
|
||||
}
|
||||
|
||||
if (cmdEl.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var cmd = cmdEl.GetString();
|
||||
if (string.IsNullOrWhiteSpace(cmd)) return null;
|
||||
|
||||
// Also merge a separate "args" array when command is a bare string.
|
||||
// A non-array "args" value is a protocol violation.
|
||||
if (args.TryGetProperty("args", out var argsEl))
|
||||
{
|
||||
if (argsEl.ValueKind != JsonValueKind.Array) { malformed = true; return null; }
|
||||
var list = new List<string> { cmd };
|
||||
foreach (var item in argsEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind != JsonValueKind.String) { malformed = true; return null; }
|
||||
list.Add(item.GetString() ?? "");
|
||||
}
|
||||
return [.. list];
|
||||
}
|
||||
return [cmd];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? TryGetString(JsonElement args, string key)
|
||||
{
|
||||
if (args.ValueKind != JsonValueKind.Object ||
|
||||
!args.TryGetProperty(key, out var el) ||
|
||||
el.ValueKind != JsonValueKind.String)
|
||||
return null;
|
||||
return el.GetString();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,85 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Either a CanonicalCommandIdentity (IsResolved=true) or a typed denial (IsResolved=false).
|
||||
// Produced by ExecApprovalV2Normalizer; consumed by the coordinator pipeline (PR7).
|
||||
public sealed class ExecApprovalV2NormalizationOutcome
|
||||
{
|
||||
public bool IsResolved { get; }
|
||||
public CanonicalCommandIdentity? Identity { get; }
|
||||
public ExecApprovalV2Result? Error { get; }
|
||||
|
||||
private ExecApprovalV2NormalizationOutcome(CanonicalCommandIdentity identity)
|
||||
{
|
||||
IsResolved = true;
|
||||
Identity = identity;
|
||||
}
|
||||
|
||||
private ExecApprovalV2NormalizationOutcome(ExecApprovalV2Result error)
|
||||
{
|
||||
IsResolved = false;
|
||||
Error = error;
|
||||
}
|
||||
|
||||
public static ExecApprovalV2NormalizationOutcome Ok(CanonicalCommandIdentity identity)
|
||||
=> new(identity);
|
||||
|
||||
public static ExecApprovalV2NormalizationOutcome Fail(ExecApprovalV2Result error)
|
||||
=> new(error);
|
||||
}
|
||||
|
||||
// Rail 18 steps 2-4: normalize command form → resolve executable → build canonical identity.
|
||||
// Stateless — safe to call concurrently.
|
||||
public static class ExecApprovalV2Normalizer
|
||||
{
|
||||
public static ExecApprovalV2NormalizationOutcome Normalize(ValidatedRunRequest request)
|
||||
{
|
||||
var argv = request.Argv;
|
||||
var cwd = request.Cwd;
|
||||
var env = request.Env as IReadOnlyDictionary<string, string>;
|
||||
|
||||
// displayCommand is always derived from argv, never from rawCommand (research doc 05 decision 2).
|
||||
var displayCommand = ShellQuoting.FormatExecCommand(argv);
|
||||
|
||||
// rawCommand is null in Windows v1 (system.run does not carry it; research doc 05 OQ-V4).
|
||||
// EvaluationRawCommand stays null — correct and documented conservative output.
|
||||
string? evaluationRawCommand = null;
|
||||
|
||||
// Singular resolution for state machine.
|
||||
var resolution = ExecCommandResolver.Resolve(argv, cwd, env);
|
||||
|
||||
// Multi-segment resolution for allowlist.
|
||||
// Empty list is fail-closed: no allowlist satisfaction possible (research doc 04 R2).
|
||||
// An empty list is NOT itself a denial at this step — the evaluator decides.
|
||||
var allowlistResolutions = ExecCommandResolver.ResolveForAllowlist(
|
||||
argv, evaluationRawCommand, cwd, env);
|
||||
|
||||
// UX patterns for prompting.
|
||||
var allowAlwaysPatterns = ExecCommandResolver.ResolveAllowAlwaysPatterns(argv, cwd, env);
|
||||
|
||||
// Rail 6: if argv is non-empty but resolution is entirely impossible, deny.
|
||||
// "Ambiguous or inconsistent" → typed deny, not silent allow.
|
||||
if (resolution is null && allowlistResolutions.Count == 0)
|
||||
return Fail("executable-resolution-failed");
|
||||
|
||||
var identity = new CanonicalCommandIdentity(
|
||||
argv,
|
||||
displayCommand,
|
||||
evaluationRawCommand,
|
||||
resolution,
|
||||
allowlistResolutions,
|
||||
allowAlwaysPatterns,
|
||||
cwd,
|
||||
request.TimeoutMs,
|
||||
env,
|
||||
request.AgentId,
|
||||
request.SessionKey);
|
||||
|
||||
return ExecApprovalV2NormalizationOutcome.Ok(identity);
|
||||
}
|
||||
|
||||
private static ExecApprovalV2NormalizationOutcome Fail(string reason)
|
||||
=> ExecApprovalV2NormalizationOutcome.Fail(
|
||||
ExecApprovalV2Result.ResolutionFailed(reason));
|
||||
}
|
||||
@ -0,0 +1,16 @@
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
/// <summary>
|
||||
/// Default V2 handler: always returns <see cref="ExecApprovalV2Code.Unavailable"/>.
|
||||
/// Keeps the V2 path inert until a real handler is installed (rail 19).
|
||||
/// Never throws, never falls through to legacy.
|
||||
/// </summary>
|
||||
public sealed class ExecApprovalV2NullHandler : IExecApprovalV2Handler
|
||||
{
|
||||
public static readonly ExecApprovalV2NullHandler Instance = new();
|
||||
|
||||
public Task<ExecApprovalV2Result> HandleAsync(OpenClaw.Shared.NodeInvokeRequest request, string correlationId)
|
||||
=> Task.FromResult(ExecApprovalV2Result.Unavailable());
|
||||
}
|
||||
50
src/OpenClaw.Shared/ExecApprovals/ExecApprovalV2Result.cs
Normal file
50
src/OpenClaw.Shared/ExecApprovals/ExecApprovalV2Result.cs
Normal file
@ -0,0 +1,50 @@
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
/// <summary>
|
||||
/// Stable result codes for the V2 exec approval path (rail 7).
|
||||
/// </summary>
|
||||
public enum ExecApprovalV2Code
|
||||
{
|
||||
Unavailable,
|
||||
SecurityDeny,
|
||||
AllowlistMiss,
|
||||
UserDenied,
|
||||
ValidationFailed,
|
||||
ResolutionFailed
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Typed result returned by the V2 exec approval path.
|
||||
/// Every outcome carries a stable code and a human-readable reason.
|
||||
/// </summary>
|
||||
public sealed class ExecApprovalV2Result
|
||||
{
|
||||
public ExecApprovalV2Code Code { get; }
|
||||
public string Reason { get; }
|
||||
|
||||
private ExecApprovalV2Result(ExecApprovalV2Code code, string reason)
|
||||
{
|
||||
Code = code;
|
||||
Reason = reason;
|
||||
}
|
||||
|
||||
public static ExecApprovalV2Result Unavailable(string reason = "Handler not available")
|
||||
=> new(ExecApprovalV2Code.Unavailable, reason);
|
||||
|
||||
public static ExecApprovalV2Result SecurityDeny(string reason)
|
||||
=> new(ExecApprovalV2Code.SecurityDeny, reason);
|
||||
|
||||
public static ExecApprovalV2Result AllowlistMiss(string reason)
|
||||
=> new(ExecApprovalV2Code.AllowlistMiss, reason);
|
||||
|
||||
public static ExecApprovalV2Result UserDenied(string reason)
|
||||
=> new(ExecApprovalV2Code.UserDenied, reason);
|
||||
|
||||
public static ExecApprovalV2Result ValidationFailed(string reason)
|
||||
=> new(ExecApprovalV2Code.ValidationFailed, reason);
|
||||
|
||||
public static ExecApprovalV2Result ResolutionFailed(string reason)
|
||||
=> new(ExecApprovalV2Code.ResolutionFailed, reason);
|
||||
|
||||
public override string ToString() => $"{Code}: {Reason}";
|
||||
}
|
||||
501
src/OpenClaw.Shared/ExecApprovals/ExecCommandResolution.cs
Normal file
501
src/OpenClaw.Shared/ExecApprovals/ExecCommandResolution.cs
Normal file
@ -0,0 +1,501 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Resolved identity of a single executable token.
|
||||
// Shape mirrors macOS ExecCommandResolution struct.
|
||||
public readonly record struct ExecCommandResolution(
|
||||
string RawExecutable,
|
||||
string? ResolvedPath,
|
||||
string ExecutableName,
|
||||
string? Cwd);
|
||||
|
||||
// The three resolution functions required by the pipeline.
|
||||
// resolve() → singular, for state machine
|
||||
// ResolveForAllowlist() → multi-segment, fail-closed, for allowlist matching
|
||||
// ResolveAllowAlwaysPatterns() → UX suggestions for prompt
|
||||
internal static class ExecCommandResolver
|
||||
{
|
||||
// Windows executable extensions, tried in order for basename search.
|
||||
private static readonly string[] s_extensions = [".exe", ".cmd", ".bat", ".com"];
|
||||
|
||||
// ── Public API ───────────────────────────────────────────────────────────
|
||||
|
||||
// Singular resolution of the primary executable for the state machine.
|
||||
// Returns null if the command is empty or resolution is impossible.
|
||||
// Unwraps transparent env prefixes (no modifiers).
|
||||
internal static ExecCommandResolution? Resolve(
|
||||
IReadOnlyList<string> command,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
var effective = ExecEnvInvocationUnwrapper.UnwrapForResolution(command);
|
||||
if (effective.Count == 0) return null;
|
||||
var raw = effective[0].Trim();
|
||||
return raw.Length == 0 ? null : ResolveExecutable(raw, cwd, env);
|
||||
}
|
||||
|
||||
// Multi-segment resolution for allowlist matching.
|
||||
// Detects shell wrappers; splits payload chain; resolves one executable per segment.
|
||||
// Returns empty list (fail-closed) on any ambiguity, command substitution, or env manipulation.
|
||||
internal static IReadOnlyList<ExecCommandResolution> ResolveForAllowlist(
|
||||
IReadOnlyList<string> command,
|
||||
string? evaluationRawCommand,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
// Fail-closed: any env invocation with modifiers (flags or VAR=val assignments).
|
||||
// The allowlist cannot verify which executable will actually run under a modified env —
|
||||
// the resolver uses the original env while execution uses the modified one.
|
||||
// Subsumes the previous shell-wrapper-only check (Hanselman review finding #2).
|
||||
if (command.Count > 0
|
||||
&& ExecCommandToken.IsEnv(command[0].Trim())
|
||||
&& ExecEnvInvocationUnwrapper.HasModifiers(command))
|
||||
return [];
|
||||
|
||||
var wrapper = ExecShellWrapperNormalizer.Extract(command);
|
||||
if (wrapper.IsWrapper)
|
||||
{
|
||||
if (wrapper.InlineCommand is null) return [];
|
||||
var segments = SplitShellCommandChain(wrapper.InlineCommand);
|
||||
if (segments is null) return [];
|
||||
|
||||
var resolutions = new List<ExecCommandResolution>(segments.Count);
|
||||
foreach (var segment in segments)
|
||||
{
|
||||
var token = ParseFirstToken(segment);
|
||||
if (token is null) return [];
|
||||
// -EncodedCommand and aliases in segment position: fail-closed (research doc 04 S1).
|
||||
if (SegmentUsesEncodedCommand(segment, token)) return [];
|
||||
var res = ResolveExecutable(token, cwd, env);
|
||||
if (res is null) return [];
|
||||
resolutions.Add(res.Value);
|
||||
}
|
||||
return resolutions;
|
||||
}
|
||||
|
||||
// Direct exec: fail-closed if powershell/pwsh invoked directly with -EncodedCommand.
|
||||
// Covers top-level `["powershell", "-enc", ...]` and transparent `["env", "pwsh", "-enc", ...]`.
|
||||
if (DirectExecUsesEncodedCommand(command)) return [];
|
||||
|
||||
var single = ResolveSingle(command, evaluationRawCommand, cwd, env);
|
||||
return single is null ? [] : [single.Value];
|
||||
}
|
||||
|
||||
// UX suggestions of allowlist patterns for prompting.
|
||||
// Unlike ResolveForAllowlist, this unwraps env with modifiers to surface the real executable.
|
||||
internal static IReadOnlyList<string> ResolveAllowAlwaysPatterns(
|
||||
IReadOnlyList<string> command,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var patterns = new List<string>();
|
||||
CollectPatterns(command, cwd, env, seen, patterns, 0);
|
||||
return patterns;
|
||||
}
|
||||
|
||||
// ── Resolution helpers ───────────────────────────────────────────────────
|
||||
|
||||
private static ExecCommandResolution? ResolveSingle(
|
||||
IReadOnlyList<string> command,
|
||||
string? rawCommand,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
// Prefer first token of evaluationRawCommand when present.
|
||||
if (!string.IsNullOrWhiteSpace(rawCommand))
|
||||
{
|
||||
var token = ParseFirstToken(rawCommand);
|
||||
if (token is not null) return ResolveExecutable(token, cwd, env);
|
||||
}
|
||||
return Resolve(command, cwd, env);
|
||||
}
|
||||
|
||||
private static ExecCommandResolution? ResolveExecutable(
|
||||
string rawExecutable,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
try
|
||||
{
|
||||
var expanded = ExpandTilde(rawExecutable);
|
||||
var hasSep = expanded.Contains('/') || expanded.Contains('\\');
|
||||
|
||||
string? resolvedPath;
|
||||
if (hasSep)
|
||||
{
|
||||
// Reject paths with ':' in non-volume-separator positions (ADS, non-standard forms).
|
||||
if (HasNonStandardColon(expanded)) return null;
|
||||
|
||||
resolvedPath = Path.IsPathFullyQualified(expanded)
|
||||
? Path.GetFullPath(expanded)
|
||||
: Path.GetFullPath(expanded, string.IsNullOrWhiteSpace(cwd)
|
||||
? Directory.GetCurrentDirectory()
|
||||
: cwd.Trim());
|
||||
}
|
||||
else
|
||||
{
|
||||
resolvedPath = FindInPath(expanded, GetSearchPaths(env), GetPathExtensions(env));
|
||||
}
|
||||
|
||||
var name = resolvedPath is not null ? Path.GetFileName(resolvedPath) : expanded;
|
||||
return new ExecCommandResolution(expanded, resolvedPath, name, cwd);
|
||||
}
|
||||
catch { return null; } // fail-closed; intentionally broad — add diagnostic tracing here if needed
|
||||
}
|
||||
|
||||
// ── Shell command chain splitting ────────────────────────────────────────
|
||||
|
||||
// Splits a shell command string on ;, &&, ||, |, &, \n.
|
||||
// Returns null (fail-closed) on command/process substitution: $(...), `...`, <(...), >(...).
|
||||
// Returns null on unclosed quotes or unresolved escapes.
|
||||
private static IReadOnlyList<string>? SplitShellCommandChain(string command)
|
||||
{
|
||||
var trimmed = command.Trim();
|
||||
if (trimmed.Length == 0) return null;
|
||||
|
||||
var segments = new List<string>();
|
||||
var current = new StringBuilder();
|
||||
bool inSingle = false, inDouble = false, escaped = false;
|
||||
var chars = trimmed.ToCharArray();
|
||||
|
||||
for (var i = 0; i < chars.Length; i++)
|
||||
{
|
||||
var ch = chars[i];
|
||||
char? next = i + 1 < chars.Length ? chars[i + 1] : null;
|
||||
|
||||
if (escaped) { current.Append(ch); escaped = false; continue; }
|
||||
if (ch == '\\' && !inSingle) { current.Append(ch); escaped = true; continue; }
|
||||
if (ch == '\'' && !inDouble) { inSingle = !inSingle; current.Append(ch); continue; }
|
||||
if (ch == '"' && !inSingle) { inDouble = !inDouble; current.Append(ch); continue; }
|
||||
|
||||
// Fail-closed on command/process substitution.
|
||||
if (!inSingle && IsCommandSubstitution(ch, next, inDouble)) return null;
|
||||
|
||||
if (!inSingle && !inDouble)
|
||||
{
|
||||
var step = DelimiterStep(ch, i > 0 ? chars[i - 1] : (char?)null, next);
|
||||
if (step.HasValue)
|
||||
{
|
||||
var seg = current.ToString().Trim();
|
||||
if (seg.Length == 0) return null;
|
||||
segments.Add(seg);
|
||||
current.Clear();
|
||||
i += step.Value - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
current.Append(ch);
|
||||
}
|
||||
|
||||
if (escaped || inSingle || inDouble) return null;
|
||||
|
||||
var last = current.ToString().Trim();
|
||||
if (last.Length == 0) return null;
|
||||
segments.Add(last);
|
||||
return segments;
|
||||
}
|
||||
|
||||
private static bool IsCommandSubstitution(char ch, char? next, bool inDouble)
|
||||
{
|
||||
if (inDouble) return ch == '`' || (ch == '$' && next == '(');
|
||||
return ch == '`' ||
|
||||
(ch == '$' && next == '(') ||
|
||||
(ch == '<' && next == '(') ||
|
||||
(ch == '>' && next == '(');
|
||||
}
|
||||
|
||||
private static int? DelimiterStep(char ch, char? prev, char? next)
|
||||
{
|
||||
if (ch == ';' || ch == '\n') return 1;
|
||||
if (ch == '&')
|
||||
{
|
||||
if (next == '&') return 2;
|
||||
return (prev == '>' || next == '>') ? null : (int?)1;
|
||||
}
|
||||
if (ch == '|')
|
||||
{
|
||||
if (next == '|' || next == '&') return 2;
|
||||
return 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Extracts the first shell-tokenized word from a command string.
|
||||
private static string? ParseFirstToken(string command)
|
||||
{
|
||||
var trimmed = command.Trim();
|
||||
if (trimmed.Length == 0) return null;
|
||||
var first = trimmed[0];
|
||||
if (first == '"' || first == '\'')
|
||||
{
|
||||
var rest = trimmed.AsSpan(1);
|
||||
var end = rest.IndexOf(first);
|
||||
if (end < 0) return null; // unclosed quote — fail-closed; do not guess the token
|
||||
var inner = rest[..end].ToString();
|
||||
if (inner.Length == 0) return null;
|
||||
// Preserve any suffix after the closing quote up to the next whitespace.
|
||||
// Handles `"git".exe` → "git.exe" and `"C:\Program Files\Git\bin\git".exe` → *.exe.
|
||||
var afterClose = rest[(end + 1)..];
|
||||
var suffixEnd = afterClose.IndexOfAny(' ', '\t');
|
||||
var suffix = suffixEnd >= 0 ? afterClose[..suffixEnd].ToString() : afterClose.ToString();
|
||||
return suffix.Length > 0 ? inner + suffix : inner;
|
||||
}
|
||||
var space = trimmed.AsSpan().IndexOfAny(' ', '\t');
|
||||
return space >= 0 ? trimmed[..space] : trimmed;
|
||||
}
|
||||
|
||||
// ── allowAlwaysPatterns collection ───────────────────────────────────────
|
||||
|
||||
private static void CollectPatterns(
|
||||
IReadOnlyList<string> command,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env,
|
||||
HashSet<string> seen,
|
||||
List<string> patterns,
|
||||
int depth)
|
||||
{
|
||||
if (depth >= 3 || command.Count == 0) return;
|
||||
|
||||
var wrapper = ExecShellWrapperNormalizer.Extract(command);
|
||||
if (wrapper.IsWrapper && wrapper.InlineCommand is not null)
|
||||
{
|
||||
var segments = SplitShellCommandChain(wrapper.InlineCommand);
|
||||
if (segments is null) return;
|
||||
foreach (var seg in segments)
|
||||
{
|
||||
// allowAlwaysPatterns does NOT fail-closed on -EncodedCommand: it's UX only.
|
||||
var token = ParseFirstToken(seg);
|
||||
if (token is null) continue;
|
||||
var res = ResolveExecutable(token, cwd, env);
|
||||
if (res is null) continue;
|
||||
var pattern = res.Value.ResolvedPath ?? res.Value.RawExecutable;
|
||||
if (seen.Add(pattern)) patterns.Add(pattern);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// For direct exec, unwrap env including with-modifier cases for pattern discovery.
|
||||
var effective = ExecEnvInvocationUnwrapper.UnwrapForResolution(command);
|
||||
if (effective.Count == 0) return;
|
||||
var rawToken = effective[0].Trim();
|
||||
if (rawToken.Length == 0) return;
|
||||
var resolution = ResolveExecutable(rawToken, cwd, env);
|
||||
if (resolution is null) return;
|
||||
var pat = resolution.Value.ResolvedPath ?? resolution.Value.RawExecutable;
|
||||
if (seen.Add(pat)) patterns.Add(pat);
|
||||
}
|
||||
|
||||
// ── -EncodedCommand detection ─────────────────────────────────────────────
|
||||
|
||||
// Research doc 04 S1: if a chain segment invokes PowerShell with -EncodedCommand (or any
|
||||
// alias / unambiguous prefix abbreviation), the payload is opaque base64 — fail-closed.
|
||||
// Only triggers when the first token IS a PowerShell binary AND the segment contains the flag.
|
||||
// `powershell -c 'Get-Date'` (no -enc) must NOT be fail-closed.
|
||||
private static bool SegmentUsesEncodedCommand(string segment, string firstToken)
|
||||
{
|
||||
var b = ExecCommandToken.NormalizedBasename(firstToken);
|
||||
if (b is not ("powershell" or "pwsh")) return false;
|
||||
|
||||
var rest = segment.AsSpan();
|
||||
while (rest.Length > 0)
|
||||
{
|
||||
var i = 0;
|
||||
while (i < rest.Length && char.IsWhiteSpace(rest[i])) i++;
|
||||
rest = rest[i..];
|
||||
if (rest.Length == 0) break;
|
||||
|
||||
// Extract next token — quoted strings count as one unit so `"-enc"` is detected.
|
||||
int end;
|
||||
if (rest[0] is '"' or '\'')
|
||||
{
|
||||
var q = rest[0];
|
||||
end = 1;
|
||||
while (end < rest.Length && rest[end] != q) end++;
|
||||
if (end < rest.Length) end++; // include closing quote
|
||||
}
|
||||
else
|
||||
{
|
||||
end = 0;
|
||||
while (end < rest.Length && !char.IsWhiteSpace(rest[end])) end++;
|
||||
}
|
||||
|
||||
var token = rest[..end].ToString();
|
||||
rest = rest[end..];
|
||||
|
||||
if (IsEncodedCommandFlag(token)) return true;
|
||||
if (token == "--") break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true when a raw flag token (possibly quoted, possibly with colon/equals value suffix)
|
||||
// represents -EncodedCommand or any of its unambiguous prefix abbreviations.
|
||||
// Covers: "-EncodedCommand", "-enc", "-ec", "-e", `"-enc"`, `-enc:payload`, `-encod`, etc.
|
||||
private static bool IsEncodedCommandFlag(string rawToken)
|
||||
{
|
||||
var t = rawToken;
|
||||
if (t.Length >= 2 && t[0] is '"' or '\'' && t[^1] == t[0])
|
||||
t = t[1..^1]; // strip matching outer quotes
|
||||
if (t.Length == 0 || t[0] != '-') return false;
|
||||
// Strip trailing :value or =value (e.g. -EncodedCommand:base64).
|
||||
var sep = t.AsSpan(1).IndexOfAny('=', ':');
|
||||
var flag = (sep >= 0 ? t[..(sep + 1)] : t).ToLowerInvariant();
|
||||
// -e is accepted by Windows PowerShell as a short alias for -EncodedCommand.
|
||||
if (flag is "-e" or "-ec" or "-enc" or "-encodedcommand") return true;
|
||||
// Any unambiguous prefix abbreviation of -encodedcommand beginning at -en.
|
||||
const string full = "-encodedcommand";
|
||||
return flag.Length >= 3 && full.StartsWith(flag, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
// True when direct exec (no shell wrapper) is a PowerShell invocation with -EncodedCommand.
|
||||
// Unwraps transparent env prefixes so `["env", "pwsh", "-enc", ...]` is also caught.
|
||||
private static bool DirectExecUsesEncodedCommand(IReadOnlyList<string> command)
|
||||
{
|
||||
var effective = ExecEnvInvocationUnwrapper.UnwrapForResolution(command);
|
||||
if (effective.Count < 2) return false;
|
||||
var b = ExecCommandToken.NormalizedBasename(effective[0].Trim());
|
||||
if (b is not ("powershell" or "pwsh")) return false;
|
||||
for (var i = 1; i < effective.Count; i++)
|
||||
{
|
||||
var t = effective[i].Trim();
|
||||
if (t == "--") break;
|
||||
if (IsEncodedCommandFlag(t)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// ── PATH search ───────────────────────────────────────────────────────────
|
||||
|
||||
private static string? GetEnvValueIgnoreCase(IReadOnlyDictionary<string, string>? env, string key)
|
||||
{
|
||||
if (env is null) return null;
|
||||
foreach (var kvp in env)
|
||||
{
|
||||
if (string.Equals(kvp.Key, key, StringComparison.OrdinalIgnoreCase))
|
||||
return kvp.Value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? FindInPath(
|
||||
string name,
|
||||
IReadOnlyList<string> searchPaths,
|
||||
IReadOnlyList<string> extensions)
|
||||
{
|
||||
foreach (var dir in searchPaths)
|
||||
{
|
||||
if (string.IsNullOrEmpty(dir)) continue;
|
||||
var candidate = Path.Combine(dir, name);
|
||||
// PATHEXT extensions first — matches Windows CreateProcess resolution order.
|
||||
// A no-extension shadow in PATH must not shadow a PATHEXT binary of the same stem.
|
||||
// Note: PATHEXT is probed even when `name` already carries an extension (git.exe →
|
||||
// tries git.exe.exe, git.exe.cmd, …). This matches CreateProcess behavior — the extra
|
||||
// File.Exists calls are harmless and avoiding them would require extension detection here.
|
||||
foreach (var ext in extensions)
|
||||
{
|
||||
var withExt = candidate + ext;
|
||||
if (File.Exists(withExt)) return TryNormalizePath(withExt);
|
||||
}
|
||||
// Bare name as final fallback (covers names that already have an explicit extension).
|
||||
if (File.Exists(candidate)) return TryNormalizePath(candidate);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> GetSearchPaths(IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
var rawPath = GetEnvValueIgnoreCase(env, "PATH");
|
||||
if (!string.IsNullOrEmpty(rawPath))
|
||||
{
|
||||
var parts = rawPath.Split(Path.PathSeparator, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0) return parts;
|
||||
}
|
||||
// Fallback to process PATH.
|
||||
var processPath = Environment.GetEnvironmentVariable("PATH");
|
||||
if (!string.IsNullOrEmpty(processPath))
|
||||
{
|
||||
var parts = processPath.Split(Path.PathSeparator, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0) return parts;
|
||||
}
|
||||
return WellKnownPaths();
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> GetPathExtensions(IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
var rawPathExt = GetEnvValueIgnoreCase(env, "PATHEXT");
|
||||
if (!string.IsNullOrEmpty(rawPathExt))
|
||||
{
|
||||
var parts = rawPathExt.Split(';', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0) return parts;
|
||||
}
|
||||
var processPathExt = Environment.GetEnvironmentVariable("PATHEXT");
|
||||
if (!string.IsNullOrEmpty(processPathExt))
|
||||
{
|
||||
var parts = processPathExt.Split(';', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0) return parts;
|
||||
}
|
||||
return s_extensions;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> WellKnownPaths()
|
||||
{
|
||||
var sys32 = Path.Combine(
|
||||
Environment.GetFolderPath(Environment.SpecialFolder.Windows), "System32");
|
||||
var sys = Environment.GetFolderPath(Environment.SpecialFolder.System);
|
||||
var pf = Environment.GetFolderPath(Environment.SpecialFolder.ProgramFiles);
|
||||
return
|
||||
[
|
||||
sys32,
|
||||
sys,
|
||||
Path.Combine(sys32, "OpenSSH"),
|
||||
Path.Combine(pf, "Git", "usr", "bin"),
|
||||
Path.Combine(pf, "Git", "bin"),
|
||||
];
|
||||
}
|
||||
|
||||
// ── Path helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
private static string ExpandTilde(string path)
|
||||
{
|
||||
if (!path.StartsWith('~')) return path;
|
||||
var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
|
||||
return path.Length == 1 ? home : home + path[1..];
|
||||
}
|
||||
|
||||
// Paths with ':' outside the volume-separator position are rejected (ADS, non-standard forms).
|
||||
// Research doc 04 section 3 / S3.
|
||||
private static bool HasNonStandardColon(string path)
|
||||
{
|
||||
// Extended-length prefix — strip it and evaluate the remainder (\\?\C:\ is valid).
|
||||
var effective = path.StartsWith(@"\\?\", StringComparison.Ordinal) ? path[4..] : path;
|
||||
|
||||
// UNC paths (\\server\share) and extended UNC (\\?\UNC\...) have no drive colon — fine.
|
||||
if (effective.StartsWith(@"\\", StringComparison.Ordinal)) return false;
|
||||
|
||||
var colonIdx = effective.IndexOf(':');
|
||||
if (colonIdx < 0) return false; // no colon — fine
|
||||
// Drive-letter form: single ASCII letter at index 0 followed by ':' — fine if no second colon.
|
||||
// '1', '!' etc. at index 0 are not valid drive letters and must be rejected.
|
||||
if (colonIdx == 1 && char.IsAsciiLetter(effective[0]))
|
||||
return effective.IndexOf(':', 2) >= 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempt 8.3 → long path normalization for paths that exist on disk.
|
||||
// Only applied to resolved paths from PATH search (existence already confirmed).
|
||||
// Research doc 04 section canonicalization / 8.3 short names.
|
||||
private static string TryNormalizePath(string path)
|
||||
{
|
||||
// GetFullPath resolves . and .. but does not expand 8.3 short names.
|
||||
// Full GetLongPathName P/Invoke is left as OQ-R1 in the research docs.
|
||||
try { return Path.GetFullPath(path); }
|
||||
catch { return path; } // hostile path must not throw out of resolution
|
||||
}
|
||||
}
|
||||
28
src/OpenClaw.Shared/ExecApprovals/ExecCommandToken.cs
Normal file
28
src/OpenClaw.Shared/ExecApprovals/ExecCommandToken.cs
Normal file
@ -0,0 +1,28 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Utility helpers for command token classification.
|
||||
internal static class ExecCommandToken
|
||||
{
|
||||
// Returns the lowercased last path component (basename) of a token, without extension.
|
||||
internal static string BasenameLower(string token)
|
||||
{
|
||||
var trimmed = token.Trim();
|
||||
if (trimmed.Length == 0) return string.Empty;
|
||||
var name = Path.GetFileName(trimmed.Replace('\\', '/'));
|
||||
if (name.Length == 0) name = trimmed;
|
||||
return name.ToLowerInvariant();
|
||||
}
|
||||
|
||||
// Returns the basename without .exe suffix (lowercased).
|
||||
internal static string NormalizedBasename(string token)
|
||||
{
|
||||
var b = BasenameLower(token);
|
||||
return b.EndsWith(".exe", StringComparison.OrdinalIgnoreCase) ? b[..^4] : b;
|
||||
}
|
||||
|
||||
internal static bool IsEnv(string token) =>
|
||||
NormalizedBasename(token) == "env";
|
||||
}
|
||||
100
src/OpenClaw.Shared/ExecApprovals/ExecEnvInvocationUnwrapper.cs
Normal file
100
src/OpenClaw.Shared/ExecApprovals/ExecEnvInvocationUnwrapper.cs
Normal file
@ -0,0 +1,100 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Strips `env [OPTIONS] [VAR=VAL...] COMMAND [ARGS...]` so the true executable can be resolved.
|
||||
// Fail-closed: returns null when any unknown flag is encountered or the command cannot be safely
|
||||
// unwrapped. Mirrors ExecEnvInvocationUnwrapper in the windows-app reference.
|
||||
internal static class ExecEnvInvocationUnwrapper
|
||||
{
|
||||
internal const int MaxWrapperDepth = 4;
|
||||
|
||||
private static readonly Regex s_envAssignment =
|
||||
new(@"^[A-Za-z_][A-Za-z0-9_]*=", RegexOptions.Compiled);
|
||||
|
||||
// Strips one level of `env` wrapper.
|
||||
// Returns the remaining argv starting at the real COMMAND token, or null on any ambiguity.
|
||||
internal static IReadOnlyList<string>? Unwrap(IReadOnlyList<string> command)
|
||||
{
|
||||
var idx = 1;
|
||||
var expectsOptionValue = false;
|
||||
|
||||
while (idx < command.Count)
|
||||
{
|
||||
var token = command[idx].Trim();
|
||||
if (token.Length == 0) { idx++; continue; }
|
||||
|
||||
if (expectsOptionValue) { expectsOptionValue = false; idx++; continue; }
|
||||
|
||||
if (token == "--" || token == "-") { idx++; break; }
|
||||
|
||||
if (s_envAssignment.IsMatch(token)) { idx++; continue; }
|
||||
|
||||
if (token.StartsWith('-') && token != "-")
|
||||
{
|
||||
var lower = token.ToLowerInvariant();
|
||||
var flag = lower.Split('=', 2)[0];
|
||||
|
||||
if (ExecEnvOptions.FlagOnly.Contains(flag)) { idx++; continue; }
|
||||
|
||||
if (ExecEnvOptions.WithValue.Contains(flag))
|
||||
{
|
||||
if (!lower.Contains('=')) expectsOptionValue = true;
|
||||
idx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ExecEnvOptions.InlineValuePrefixes.Any(p => lower.StartsWith(p, StringComparison.Ordinal)))
|
||||
{
|
||||
idx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
return null; // Unknown flag — fail-closed.
|
||||
}
|
||||
|
||||
break; // Executable token found.
|
||||
}
|
||||
|
||||
if (idx >= command.Count) return null;
|
||||
return command.Skip(idx).ToList();
|
||||
}
|
||||
|
||||
// Returns true when the env invocation has flags or VAR=val assignments before the command.
|
||||
// `--` ends option processing without modifying the environment → not a modifier.
|
||||
// `-` alone replaces the environment entirely → modifier.
|
||||
internal static bool HasModifiers(IReadOnlyList<string> command)
|
||||
{
|
||||
for (var i = 1; i < command.Count; i++)
|
||||
{
|
||||
var token = command[i].Trim();
|
||||
if (token.Length == 0) continue;
|
||||
if (token == "--") return false;
|
||||
if (token == "-") return true;
|
||||
if (token.StartsWith('-')) return true;
|
||||
if (s_envAssignment.IsMatch(token)) return true;
|
||||
return false; // first non-modifier token is the command
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Iteratively strips env wrappers for executable resolution only.
|
||||
internal static IReadOnlyList<string> UnwrapForResolution(IReadOnlyList<string> command)
|
||||
{
|
||||
var current = command;
|
||||
for (var depth = 0; depth < MaxWrapperDepth; depth++)
|
||||
{
|
||||
if (current.Count == 0) break;
|
||||
var token = current[0].Trim();
|
||||
if (token.Length == 0) break;
|
||||
if (!ExecCommandToken.IsEnv(token)) break;
|
||||
var unwrapped = Unwrap(current);
|
||||
if (unwrapped is null || unwrapped.Count == 0) break;
|
||||
current = unwrapped;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
}
|
||||
38
src/OpenClaw.Shared/ExecApprovals/ExecEnvOptions.cs
Normal file
38
src/OpenClaw.Shared/ExecApprovals/ExecEnvOptions.cs
Normal file
@ -0,0 +1,38 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Option grammar of the POSIX `env` command.
|
||||
// Mirrors the constants in the windows-app reference (ExecEnvOptions.cs).
|
||||
internal static class ExecEnvOptions
|
||||
{
|
||||
// Options that consume the next argument as their value (or use inline = form).
|
||||
internal static readonly HashSet<string> WithValue = new(System.StringComparer.Ordinal)
|
||||
{
|
||||
"-u", "--unset",
|
||||
"-c", "--chdir",
|
||||
"-s", "--split-string",
|
||||
"--default-signal",
|
||||
"--ignore-signal",
|
||||
"--block-signal",
|
||||
};
|
||||
|
||||
// Options that are standalone flags (take no value at all).
|
||||
internal static readonly HashSet<string> FlagOnly = new(System.StringComparer.Ordinal)
|
||||
{
|
||||
"-i", "--ignore-environment",
|
||||
"-0", "--null",
|
||||
};
|
||||
|
||||
// Prefixes for the inline-value form (e.g. `-uFOO` or `--unset=FOO`).
|
||||
internal static readonly IReadOnlyList<string> InlineValuePrefixes =
|
||||
[
|
||||
"-u", "-c", "-s",
|
||||
"--unset=",
|
||||
"--chdir=",
|
||||
"--split-string=",
|
||||
"--default-signal=",
|
||||
"--ignore-signal=",
|
||||
"--block-signal=",
|
||||
];
|
||||
}
|
||||
118
src/OpenClaw.Shared/ExecApprovals/ExecShellWrapperNormalizer.cs
Normal file
118
src/OpenClaw.Shared/ExecApprovals/ExecShellWrapperNormalizer.cs
Normal file
@ -0,0 +1,118 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Single-level shell wrapper detection for the V2 exec approval pipeline.
|
||||
// Differs from the legacy ExecShellWrapperParser.Expand (BFS multi-level, string-based).
|
||||
// This normalizer operates on argv (IReadOnlyList<string>) and performs one level of
|
||||
// wrapper detection, with recursive env-prefix unwrapping up to MaxWrapperDepth.
|
||||
// Rail 18 step 2: normalize command form.
|
||||
internal static class ExecShellWrapperNormalizer
|
||||
{
|
||||
private enum WrapperKind { Posix, Cmd, PowerShell }
|
||||
|
||||
private sealed record WrapperSpec(WrapperKind Kind, HashSet<string> Names);
|
||||
|
||||
private static readonly HashSet<string> s_posixInlineFlags =
|
||||
new(StringComparer.OrdinalIgnoreCase) { "-lc", "-c", "--command" };
|
||||
|
||||
private static readonly HashSet<string> s_powerShellInlineFlags =
|
||||
new(StringComparer.OrdinalIgnoreCase) { "-c", "-command", "--command" };
|
||||
|
||||
private static readonly WrapperSpec[] s_specs =
|
||||
[
|
||||
new(WrapperKind.Posix, new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{ "ash", "sh", "bash", "zsh", "dash", "ksh", "fish" }),
|
||||
new(WrapperKind.Cmd, new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{ "cmd", "cmd.exe" }),
|
||||
new(WrapperKind.PowerShell, new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{ "powershell", "powershell.exe", "pwsh", "pwsh.exe" }),
|
||||
];
|
||||
|
||||
internal sealed record ParsedWrapper(bool IsWrapper, string? InlineCommand);
|
||||
|
||||
internal static readonly ParsedWrapper NotWrapper = new(false, null);
|
||||
|
||||
// Detects a single-level shell wrapper in argv.
|
||||
// rawCommand is always null in Windows v1 (not in system.run protocol; research doc 05 OQ-V4).
|
||||
// Detection is on argv only; rawCommand is accepted for API compatibility with future use.
|
||||
internal static ParsedWrapper Extract(IReadOnlyList<string> command, string? rawCommand = null)
|
||||
=> ExtractInner(command, rawCommand, 0);
|
||||
|
||||
private static ParsedWrapper ExtractInner(
|
||||
IReadOnlyList<string> command, string? rawCommand, int depth)
|
||||
{
|
||||
if (depth >= ExecEnvInvocationUnwrapper.MaxWrapperDepth) return NotWrapper;
|
||||
if (command.Count == 0) return NotWrapper;
|
||||
|
||||
var token0 = command[0].Trim();
|
||||
if (token0.Length == 0) return NotWrapper;
|
||||
|
||||
// Recursively unwrap transparent env prefixes.
|
||||
if (ExecCommandToken.IsEnv(token0))
|
||||
{
|
||||
var unwrapped = ExecEnvInvocationUnwrapper.Unwrap(command);
|
||||
if (unwrapped is null) return NotWrapper;
|
||||
return ExtractInner(unwrapped, rawCommand, depth + 1);
|
||||
}
|
||||
|
||||
var basename = ExecCommandToken.NormalizedBasename(token0);
|
||||
var spec = Array.Find(s_specs, s => s.Names.Contains(basename));
|
||||
if (spec is null) return NotWrapper;
|
||||
|
||||
var payload = ExtractPayload(command, spec);
|
||||
if (payload is null) return NotWrapper;
|
||||
|
||||
return new ParsedWrapper(true, payload);
|
||||
}
|
||||
|
||||
private static string? ExtractPayload(IReadOnlyList<string> command, WrapperSpec spec) =>
|
||||
spec.Kind switch
|
||||
{
|
||||
WrapperKind.Posix => ExtractPosixPayload(command),
|
||||
WrapperKind.Cmd => ExtractCmdPayload(command),
|
||||
WrapperKind.PowerShell => ExtractPowerShellPayload(command),
|
||||
_ => null,
|
||||
};
|
||||
|
||||
private static string? ExtractPosixPayload(IReadOnlyList<string> command)
|
||||
{
|
||||
if (command.Count < 2) return null;
|
||||
var flag = command[1].Trim();
|
||||
if (!s_posixInlineFlags.Contains(flag)) return null;
|
||||
if (command.Count < 3) return null;
|
||||
var payload = command[2].Trim();
|
||||
return payload.Length == 0 ? null : payload;
|
||||
}
|
||||
|
||||
private static string? ExtractCmdPayload(IReadOnlyList<string> command)
|
||||
{
|
||||
for (var i = 1; i < command.Count; i++)
|
||||
{
|
||||
if (string.Equals(command[i].Trim(), "/c", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var tail = string.Join(" ", command.Skip(i + 1)).Trim();
|
||||
return tail.Length == 0 ? null : tail;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? ExtractPowerShellPayload(IReadOnlyList<string> command)
|
||||
{
|
||||
for (var i = 1; i < command.Count; i++)
|
||||
{
|
||||
var t = command[i].Trim().ToLowerInvariant();
|
||||
if (t.Length == 0) continue;
|
||||
if (t == "--") break;
|
||||
if (s_powerShellInlineFlags.Contains(t))
|
||||
{
|
||||
if (i + 1 >= command.Count) return null;
|
||||
var payload = command[i + 1].Trim();
|
||||
return payload.Length == 0 ? null : payload;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
14
src/OpenClaw.Shared/ExecApprovals/IExecApprovalV2Handler.cs
Normal file
14
src/OpenClaw.Shared/ExecApprovals/IExecApprovalV2Handler.cs
Normal file
@ -0,0 +1,14 @@
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
/// <summary>
|
||||
/// Seam for the V2 exec approval path (rail 10: UI-free, no WinUI types).
|
||||
/// Implementations decide whether a system.run request is allowed.
|
||||
/// In PR1 only the NullHandler exists; real evaluation arrives in later PRs.
|
||||
/// </summary>
|
||||
public interface IExecApprovalV2Handler
|
||||
{
|
||||
/// <param name="correlationId">Short identifier propagated through logging for this request.</param>
|
||||
Task<ExecApprovalV2Result> HandleAsync(OpenClaw.Shared.NodeInvokeRequest request, string correlationId);
|
||||
}
|
||||
62
src/OpenClaw.Shared/ExecApprovals/ValidatedRunRequest.cs
Normal file
62
src/OpenClaw.Shared/ExecApprovals/ValidatedRunRequest.cs
Normal file
@ -0,0 +1,62 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
/// <summary>
|
||||
/// Structurally-valid system.run input produced by ExecApprovalV2InputValidator.
|
||||
/// Argv is guaranteed non-empty with a non-blank first element.
|
||||
/// </summary>
|
||||
public sealed class ValidatedRunRequest
|
||||
{
|
||||
public string[] Argv { get; }
|
||||
public string? Shell { get; }
|
||||
public string? Cwd { get; }
|
||||
public int TimeoutMs { get; }
|
||||
public IReadOnlyDictionary<string, string>? Env { get; }
|
||||
public string? AgentId { get; }
|
||||
public string? SessionKey { get; }
|
||||
|
||||
internal ValidatedRunRequest(
|
||||
string[] argv,
|
||||
string? shell,
|
||||
string? cwd,
|
||||
int timeoutMs,
|
||||
IReadOnlyDictionary<string, string>? env,
|
||||
string? agentId,
|
||||
string? sessionKey)
|
||||
{
|
||||
Argv = argv;
|
||||
Shell = shell;
|
||||
Cwd = cwd;
|
||||
TimeoutMs = timeoutMs;
|
||||
Env = env;
|
||||
AgentId = agentId;
|
||||
SessionKey = sessionKey;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Either a ValidatedRunRequest (IsValid=true) or a typed denial (IsValid=false).
|
||||
/// Produced by ExecApprovalV2InputValidator; consumed by the coordinator pipeline.
|
||||
/// </summary>
|
||||
public sealed class ExecApprovalV2ValidationOutcome
|
||||
{
|
||||
public bool IsValid { get; }
|
||||
public ValidatedRunRequest? Request { get; }
|
||||
public ExecApprovalV2Result? Error { get; }
|
||||
|
||||
private ExecApprovalV2ValidationOutcome(ValidatedRunRequest request)
|
||||
{
|
||||
IsValid = true;
|
||||
Request = request;
|
||||
}
|
||||
|
||||
private ExecApprovalV2ValidationOutcome(ExecApprovalV2Result error)
|
||||
{
|
||||
IsValid = false;
|
||||
Error = error;
|
||||
}
|
||||
|
||||
public static ExecApprovalV2ValidationOutcome Ok(ValidatedRunRequest r) => new(r);
|
||||
public static ExecApprovalV2ValidationOutcome Fail(ExecApprovalV2Result e) => new(e);
|
||||
}
|
||||
173
src/OpenClaw.Shared/ExecEnvSanitizer.cs
Normal file
173
src/OpenClaw.Shared/ExecEnvSanitizer.cs
Normal file
@ -0,0 +1,173 @@
|
||||
using System;
|
||||
using System.Collections.Frozen;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
internal sealed class ExecEnvSanitizeResult
|
||||
{
|
||||
public Dictionary<string, string>? Allowed { get; init; }
|
||||
public string[] Blocked { get; init; } = Array.Empty<string>();
|
||||
}
|
||||
|
||||
internal static class ExecEnvSanitizer
|
||||
{
|
||||
private static readonly FrozenSet<string> _blockedNames =
|
||||
new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
"PATH",
|
||||
"PATHEXT",
|
||||
"ComSpec",
|
||||
"PSModulePath",
|
||||
"NODE_OPTIONS",
|
||||
"NODE_PATH",
|
||||
"PYTHONPATH",
|
||||
"PYTHONSTARTUP",
|
||||
"PYTHONUSERBASE",
|
||||
"RUBYOPT",
|
||||
"RUBYLIB",
|
||||
"PERL5OPT",
|
||||
"PERL5LIB",
|
||||
"PERLIO",
|
||||
"GIT_SSH",
|
||||
"GIT_SSH_COMMAND",
|
||||
"GIT_EXEC_PATH",
|
||||
"GIT_PROXY_COMMAND",
|
||||
"GIT_ASKPASS",
|
||||
"BASH_ENV",
|
||||
"ENV",
|
||||
"CDPATH",
|
||||
"PROMPT_COMMAND",
|
||||
"ZDOTDIR",
|
||||
"LD_PRELOAD",
|
||||
"LD_LIBRARY_PATH",
|
||||
"LD_AUDIT",
|
||||
"DYLD_INSERT_LIBRARIES",
|
||||
"DYLD_LIBRARY_PATH",
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
"AWS_SESSION_TOKEN",
|
||||
"AZURE_CLIENT_SECRET",
|
||||
"GITHUB_TOKEN",
|
||||
"GH_TOKEN",
|
||||
"NPM_TOKEN",
|
||||
"OPENAI_API_KEY"
|
||||
}.ToFrozenSet(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
internal static ExecEnvSanitizeResult Sanitize(Dictionary<string, string>? env)
|
||||
{
|
||||
if (env is not { Count: > 0 })
|
||||
{
|
||||
return new ExecEnvSanitizeResult { Allowed = env };
|
||||
}
|
||||
|
||||
var allowed = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
var blocked = new List<string>();
|
||||
|
||||
foreach (var (name, value) in env)
|
||||
{
|
||||
if (IsBlocked(name))
|
||||
{
|
||||
blocked.Add(name);
|
||||
continue;
|
||||
}
|
||||
|
||||
allowed[name] = value;
|
||||
}
|
||||
|
||||
return new ExecEnvSanitizeResult
|
||||
{
|
||||
Allowed = allowed.Count > 0 ? allowed : null,
|
||||
Blocked = blocked.ToArray()
|
||||
};
|
||||
}
|
||||
|
||||
internal static bool IsBlocked(string? name)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(name))
|
||||
return true;
|
||||
|
||||
if (name.IndexOfAny(['=', '\0', '\r', '\n']) >= 0)
|
||||
return true;
|
||||
|
||||
// Vectorized scan: any char in [0x00, 0x20] covers all ASCII control characters
|
||||
// (0x01–0x1F) plus space (0x20) in a single SIMD pass — the common fast path for
|
||||
// the ASCII-only names that make up virtually all environment variable keys.
|
||||
var span = name.AsSpan();
|
||||
if (span.IndexOfAnyInRange('\x00', '\x20') >= 0)
|
||||
return true;
|
||||
// DEL (0x7F) — control char outside the range above.
|
||||
if (span.IndexOf('\x7F') >= 0)
|
||||
return true;
|
||||
// Non-ASCII Unicode control / whitespace (rare; UTF-8 env var names are uncommon).
|
||||
for (var i = 0; i < name.Length; i++)
|
||||
{
|
||||
var c = name[i];
|
||||
if (c > '\x7F' && (char.IsControl(c) || char.IsWhiteSpace(c)))
|
||||
return true;
|
||||
}
|
||||
|
||||
return _blockedNames.Contains(name)
|
||||
|| HasCredentialMarker(name)
|
||||
|| name.StartsWith("LD_", StringComparison.OrdinalIgnoreCase)
|
||||
|| name.StartsWith("DYLD_", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static bool HasCredentialMarker(string name)
|
||||
{
|
||||
return HasSegment(name, "TOKEN")
|
||||
|| HasSegment(name, "SECRET")
|
||||
|| HasSegment(name, "PASSWORD")
|
||||
|| HasSegment(name, "PASSWD")
|
||||
|| HasCompoundMarker(name, "API", "KEY")
|
||||
|| HasCompoundMarker(name, "ACCESS", "KEY")
|
||||
|| HasCompoundMarker(name, "PRIVATE", "KEY")
|
||||
|| HasCompoundMarker(name, "CLIENT", "SECRET")
|
||||
|| HasCompoundMarker(name, "CONNECTION", "STRING")
|
||||
|| HasSegment(name, "CREDENTIAL")
|
||||
|| HasSegment(name, "CREDENTIALS")
|
||||
|| name.Contains("CONNSTR", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static bool HasCompoundMarker(string name, string first, string second)
|
||||
{
|
||||
var span = name.AsSpan();
|
||||
var firstSpan = first.AsSpan();
|
||||
var secondSpan = second.AsSpan();
|
||||
var start = 0;
|
||||
var previousMatched = false;
|
||||
for (var i = 0; i <= span.Length; i++)
|
||||
{
|
||||
if (i < span.Length && span[i] is not ('_' or '-' or '.'))
|
||||
continue;
|
||||
|
||||
var current = span[start..i];
|
||||
if (previousMatched && current.Equals(secondSpan, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
previousMatched = current.Equals(firstSpan, StringComparison.OrdinalIgnoreCase);
|
||||
start = i + 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool HasSegment(string name, string segment)
|
||||
{
|
||||
var span = name.AsSpan();
|
||||
var segmentSpan = segment.AsSpan();
|
||||
var start = 0;
|
||||
for (var i = 0; i <= span.Length; i++)
|
||||
{
|
||||
if (i < span.Length && span[i] is not ('_' or '-' or '.'))
|
||||
continue;
|
||||
|
||||
if (span[start..i].Equals(segmentSpan, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
start = i + 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
347
src/OpenClaw.Shared/ExecShellWrapperParser.cs
Normal file
347
src/OpenClaw.Shared/ExecShellWrapperParser.cs
Normal file
@ -0,0 +1,347 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
internal sealed class ExecShellEvaluationTarget
|
||||
{
|
||||
public string Command { get; init; } = "";
|
||||
public string? Shell { get; init; }
|
||||
}
|
||||
|
||||
internal sealed class ExecShellParseResult
|
||||
{
|
||||
public List<ExecShellEvaluationTarget> Targets { get; } = new();
|
||||
public string? Error { get; init; }
|
||||
}
|
||||
|
||||
internal static class ExecShellWrapperParser
|
||||
{
|
||||
private const int MaxDepth = 4;
|
||||
|
||||
internal static ExecShellParseResult Expand(string command, string? shell = null)
|
||||
{
|
||||
var result = new ExecShellParseResult();
|
||||
|
||||
if (string.IsNullOrWhiteSpace(command))
|
||||
return result;
|
||||
|
||||
var pending = new Queue<(string Command, string? Shell, int Depth)>();
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
pending.Enqueue((command, NormalizeShell(shell), 0));
|
||||
|
||||
while (pending.Count > 0)
|
||||
{
|
||||
var (current, currentShell, depth) = pending.Dequeue();
|
||||
if (string.IsNullOrWhiteSpace(current) || depth > MaxDepth)
|
||||
continue;
|
||||
|
||||
var segments = SplitTopLevelCommands(current);
|
||||
var hasMultipleSegments = segments.Count > 1;
|
||||
|
||||
foreach (var rawSegment in segments)
|
||||
{
|
||||
var segment = TrimMatchingQuotes(rawSegment.Trim());
|
||||
if (string.IsNullOrWhiteSpace(segment))
|
||||
continue;
|
||||
|
||||
if ((depth > 0 || hasMultipleSegments) && seen.Add($"{currentShell}|{segment}"))
|
||||
{
|
||||
result.Targets.Add(new ExecShellEvaluationTarget
|
||||
{
|
||||
Command = segment,
|
||||
Shell = currentShell
|
||||
});
|
||||
}
|
||||
|
||||
var wrapped = TryExtractWrappedPayload(segment);
|
||||
if (wrapped.Error != null)
|
||||
{
|
||||
return new ExecShellParseResult { Error = wrapped.Error };
|
||||
}
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(wrapped.Payload))
|
||||
{
|
||||
pending.Enqueue((wrapped.Payload!, wrapped.Shell ?? currentShell, depth + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static (string? Payload, string? Shell, string? Error) TryExtractWrappedPayload(string command)
|
||||
{
|
||||
var tokens = Tokenize(command);
|
||||
if (tokens.Length < 2)
|
||||
return default;
|
||||
|
||||
var executable = Path.GetFileName(tokens[0]);
|
||||
if (string.IsNullOrWhiteSpace(executable))
|
||||
return default;
|
||||
|
||||
if (executable.Equals("cmd", StringComparison.OrdinalIgnoreCase) ||
|
||||
executable.Equals("cmd.exe", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
for (var i = 1; i < tokens.Length; i++)
|
||||
{
|
||||
if (tokens[i].Equals("/c", StringComparison.OrdinalIgnoreCase) ||
|
||||
tokens[i].Equals("/k", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var payload = string.Join(" ", tokens, i + 1, tokens.Length - i - 1).Trim();
|
||||
return string.IsNullOrWhiteSpace(payload)
|
||||
? ("", "cmd", "Shell wrapper payload was empty")
|
||||
: (payload, "cmd", null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (executable.Equals("powershell", StringComparison.OrdinalIgnoreCase) ||
|
||||
executable.Equals("powershell.exe", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return ParsePowerShellPayload(tokens, "powershell");
|
||||
}
|
||||
|
||||
if (executable.Equals("pwsh", StringComparison.OrdinalIgnoreCase) ||
|
||||
executable.Equals("pwsh.exe", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return ParsePowerShellPayload(tokens, "pwsh");
|
||||
}
|
||||
|
||||
if (executable.Equals("bash", StringComparison.OrdinalIgnoreCase) ||
|
||||
executable.Equals("bash.exe", StringComparison.OrdinalIgnoreCase) ||
|
||||
executable.Equals("sh", StringComparison.OrdinalIgnoreCase) ||
|
||||
executable.Equals("sh.exe", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
for (var i = 1; i < tokens.Length; i++)
|
||||
{
|
||||
if (tokens[i].Equals("-c", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var payload = string.Join(" ", tokens, i + 1, tokens.Length - i - 1).Trim();
|
||||
return string.IsNullOrWhiteSpace(payload)
|
||||
? ("", "sh", "Shell wrapper payload was empty")
|
||||
: (payload, "sh", null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return default;
|
||||
}
|
||||
|
||||
private static (string? Payload, string? Shell, string? Error) ParsePowerShellPayload(string[] tokens, string shell)
|
||||
{
|
||||
for (var i = 1; i < tokens.Length; i++)
|
||||
{
|
||||
var option = tokens[i];
|
||||
|
||||
// Check for inline separator form first: -flag:value or -flag=value
|
||||
var sepIdx = IndexOfFlagSeparator(option);
|
||||
if (sepIdx > 0)
|
||||
{
|
||||
var flagPart = option[..sepIdx];
|
||||
var valuePart = option[(sepIdx + 1)..];
|
||||
|
||||
if (IsCommandFlag(flagPart))
|
||||
{
|
||||
return string.IsNullOrWhiteSpace(valuePart)
|
||||
? ("", shell, "Shell wrapper payload was empty")
|
||||
: (valuePart, shell, null);
|
||||
}
|
||||
|
||||
if (IsEncodedCommandFlag(flagPart))
|
||||
return DecodeEncodedPayload(valuePart, shell);
|
||||
}
|
||||
|
||||
if (IsCommandFlag(option))
|
||||
{
|
||||
var payload = string.Join(" ", tokens, i + 1, tokens.Length - i - 1).Trim();
|
||||
return string.IsNullOrWhiteSpace(payload)
|
||||
? ("", shell, "Shell wrapper payload was empty")
|
||||
: (payload, shell, null);
|
||||
}
|
||||
|
||||
if (IsEncodedCommandFlag(option))
|
||||
{
|
||||
var encoded = i + 1 < tokens.Length ? tokens[i + 1] : null;
|
||||
return DecodeEncodedPayload(encoded, shell);
|
||||
}
|
||||
}
|
||||
|
||||
return default;
|
||||
}
|
||||
|
||||
// Returns the index of the first ':' or '=' in a flag token (after the leading '-').
|
||||
private static int IndexOfFlagSeparator(string token)
|
||||
{
|
||||
for (var i = 1; i < token.Length; i++)
|
||||
{
|
||||
if (token[i] == ':' || token[i] == '=')
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Matches -Command and -c (documented PowerShell -Command aliases).
|
||||
private static bool IsCommandFlag(string flag) =>
|
||||
flag.Equals("-Command", StringComparison.OrdinalIgnoreCase) ||
|
||||
flag.Equals("-c", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
// Matches -e/-ec aliases and all unique prefix abbreviations of -EncodedCommand.
|
||||
// Windows PowerShell accepts -e as EncodedCommand despite the apparent ambiguity with
|
||||
// -ExecutionPolicy, so the parser must fail closed and decode it.
|
||||
private static bool IsEncodedCommandFlag(string flag)
|
||||
{
|
||||
if (flag.Equals("-e", StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
if (flag.Equals("-ec", StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
const string fullFlag = "-encodedcommand";
|
||||
return flag.Length >= 3 && // minimum: -en
|
||||
flag.Length <= fullFlag.Length &&
|
||||
fullFlag.StartsWith(flag, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static (string? Payload, string? Shell, string? Error) DecodeEncodedPayload(string? encoded, string shell)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(encoded))
|
||||
return ("", shell, "Shell wrapper payload was empty");
|
||||
|
||||
try
|
||||
{
|
||||
var bytes = Convert.FromBase64String(encoded);
|
||||
var payload = Encoding.Unicode.GetString(bytes).Trim();
|
||||
return string.IsNullOrWhiteSpace(payload)
|
||||
? ("", shell, "EncodedCommand decoded to an empty payload")
|
||||
: (payload, shell, null);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
return ("", shell, "EncodedCommand could not be decoded");
|
||||
}
|
||||
}
|
||||
|
||||
private static List<string> SplitTopLevelCommands(string command)
|
||||
{
|
||||
var parts = new List<string>();
|
||||
var current = new StringBuilder();
|
||||
var inSingleQuotes = false;
|
||||
var inDoubleQuotes = false;
|
||||
|
||||
for (var i = 0; i < command.Length; i++)
|
||||
{
|
||||
var c = command[i];
|
||||
|
||||
if (c == '"' && !inSingleQuotes)
|
||||
{
|
||||
inDoubleQuotes = !inDoubleQuotes;
|
||||
current.Append(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '\'' && !inDoubleQuotes)
|
||||
{
|
||||
inSingleQuotes = !inSingleQuotes;
|
||||
current.Append(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inSingleQuotes && !inDoubleQuotes)
|
||||
{
|
||||
if (c == ';' || c == '&')
|
||||
{
|
||||
FlushCurrent(parts, current);
|
||||
if (c == '&' && i + 1 < command.Length && command[i + 1] == '&')
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '|' && i + 1 < command.Length && command[i + 1] == '|')
|
||||
{
|
||||
FlushCurrent(parts, current);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
current.Append(c);
|
||||
}
|
||||
|
||||
FlushCurrent(parts, current);
|
||||
return parts;
|
||||
}
|
||||
|
||||
private static string[] Tokenize(string command)
|
||||
{
|
||||
var tokens = new List<string>();
|
||||
var current = new StringBuilder();
|
||||
var inSingleQuotes = false;
|
||||
var inDoubleQuotes = false;
|
||||
var escapeNext = false;
|
||||
|
||||
foreach (var c in command)
|
||||
{
|
||||
if (escapeNext)
|
||||
{
|
||||
current.Append(c);
|
||||
escapeNext = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '\\' && inDoubleQuotes)
|
||||
{
|
||||
escapeNext = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '"' && !inSingleQuotes)
|
||||
{
|
||||
inDoubleQuotes = !inDoubleQuotes;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '\'' && !inDoubleQuotes)
|
||||
{
|
||||
inSingleQuotes = !inSingleQuotes;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inSingleQuotes && !inDoubleQuotes && char.IsWhiteSpace(c))
|
||||
{
|
||||
FlushCurrent(tokens, current);
|
||||
continue;
|
||||
}
|
||||
|
||||
current.Append(c);
|
||||
}
|
||||
|
||||
FlushCurrent(tokens, current);
|
||||
return tokens.ToArray();
|
||||
}
|
||||
|
||||
private static string TrimMatchingQuotes(string value)
|
||||
{
|
||||
if (value.Length >= 2 &&
|
||||
((value[0] == '"' && value[^1] == '"') || (value[0] == '\'' && value[^1] == '\'')))
|
||||
{
|
||||
return value[1..^1];
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
private static string? NormalizeShell(string? shell) =>
|
||||
string.IsNullOrWhiteSpace(shell) ? "powershell" : shell.ToLowerInvariant();
|
||||
|
||||
private static void FlushCurrent(List<string> parts, StringBuilder current)
|
||||
{
|
||||
if (current.Length == 0)
|
||||
return;
|
||||
|
||||
parts.Add(current.ToString());
|
||||
current.Clear();
|
||||
}
|
||||
}
|
||||
@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Buffers;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
@ -6,6 +7,9 @@ public static class GatewayUrlHelper
|
||||
{
|
||||
public const string ValidationMessage = "Gateway URL must be a valid URL (ws://, wss://, http://, or https://).";
|
||||
|
||||
private static readonly SearchValues<char> s_authorityTerminators =
|
||||
SearchValues.Create("/?#");
|
||||
|
||||
public static bool IsValidGatewayUrl(string? gatewayUrl) =>
|
||||
TryNormalizeWebSocketUrl(gatewayUrl, out _);
|
||||
|
||||
@ -59,8 +63,8 @@ public static class GatewayUrlHelper
|
||||
}
|
||||
}
|
||||
|
||||
var username = credentials.Substring(0, separatorIndex);
|
||||
var password = credentials.Substring(separatorIndex + 1);
|
||||
var username = credentials[..separatorIndex];
|
||||
var password = credentials[(separatorIndex + 1)..];
|
||||
|
||||
try
|
||||
{
|
||||
@ -113,7 +117,7 @@ public static class GatewayUrlHelper
|
||||
return false;
|
||||
}
|
||||
|
||||
var remainder = trimmed.Substring(schemeSeparator);
|
||||
var remainder = trimmed[schemeSeparator..];
|
||||
if (uri.Scheme.Equals("http", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
candidate = "ws" + remainder;
|
||||
@ -141,11 +145,8 @@ public static class GatewayUrlHelper
|
||||
}
|
||||
|
||||
var authorityStart = schemeSeparator + 3;
|
||||
var authorityEnd = url.IndexOfAny(new[] { '/', '?', '#' }, authorityStart);
|
||||
if (authorityEnd < 0)
|
||||
{
|
||||
authorityEnd = url.Length;
|
||||
}
|
||||
var relativeEnd = url.AsSpan(authorityStart).IndexOfAny(s_authorityTerminators);
|
||||
var authorityEnd = relativeEnd < 0 ? url.Length : authorityStart + relativeEnd;
|
||||
|
||||
var atIndex = url.IndexOf('@', authorityStart);
|
||||
if (atIndex < 0 || atIndex >= authorityEnd)
|
||||
@ -153,7 +154,7 @@ public static class GatewayUrlHelper
|
||||
return url;
|
||||
}
|
||||
|
||||
return url.Substring(0, authorityStart) + url.Substring(atIndex + 1);
|
||||
return string.Concat(url.AsSpan(0, authorityStart), url.AsSpan(atIndex + 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
224
src/OpenClaw.Shared/HttpUrlRiskEvaluator.cs
Normal file
224
src/OpenClaw.Shared/HttpUrlRiskEvaluator.cs
Normal file
@ -0,0 +1,224 @@
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
public enum HttpUrlSecurityZone
|
||||
{
|
||||
Unknown = -1,
|
||||
LocalMachine = 0,
|
||||
Intranet = 1,
|
||||
Trusted = 2,
|
||||
Internet = 3,
|
||||
Restricted = 4,
|
||||
}
|
||||
|
||||
public sealed record HttpUrlRiskProfile(
|
||||
string CanonicalUrl,
|
||||
string CanonicalOrigin,
|
||||
string HostKey,
|
||||
HttpUrlSecurityZone Zone,
|
||||
bool RequiresConfirmation,
|
||||
IReadOnlyList<string> Reasons);
|
||||
|
||||
/// <summary>
|
||||
/// Centralized risk classifier for agent-supplied HTTP URLs. Callers should run
|
||||
/// <see cref="HttpUrlValidator"/> first; this type decides whether an otherwise
|
||||
/// valid URL needs user confirmation before browser navigation or media handoff.
|
||||
/// </summary>
|
||||
public static class HttpUrlRiskEvaluator
|
||||
{
|
||||
public static HttpUrlRiskProfile Evaluate(string canonicalUrl)
|
||||
{
|
||||
if (!Uri.TryCreate(canonicalUrl, UriKind.Absolute, out var uri))
|
||||
throw new ArgumentException("URL must be an absolute URI", nameof(canonicalUrl));
|
||||
|
||||
var reasons = new List<string>();
|
||||
var host = uri.Host;
|
||||
|
||||
if (!string.Equals(uri.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))
|
||||
reasons.Add("URL does not use HTTPS");
|
||||
|
||||
// Homograph defense: a Unicode hostname that round-trips to a different
|
||||
// ASCII (Punycode) form is suspicious — `аpple.com` (Cyrillic 'а') and
|
||||
// `apple.com` are visually identical but resolve differently. Always
|
||||
// surface the mismatch as a Reason so the prompt fires for IDN hosts.
|
||||
if (!string.Equals(uri.Host, uri.IdnHost, StringComparison.Ordinal))
|
||||
reasons.Add($"Hostname is internationalized; punycode form is '{uri.IdnHost}'");
|
||||
|
||||
if (string.Equals(host, "localhost", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
reasons.Add("Host is localhost");
|
||||
}
|
||||
else if (IPAddress.TryParse(host, out var ip))
|
||||
{
|
||||
reasons.Add("Host is an IP literal");
|
||||
AddAddressRiskReasons(ip, reasons);
|
||||
}
|
||||
else if (!host.Contains('.', StringComparison.Ordinal))
|
||||
{
|
||||
reasons.Add("Host has no dot and may resolve on the local intranet");
|
||||
}
|
||||
|
||||
var zone = MapUrlToZone(canonicalUrl);
|
||||
switch (zone)
|
||||
{
|
||||
case HttpUrlSecurityZone.LocalMachine:
|
||||
reasons.Add("Windows classifies this URL as Local Machine zone");
|
||||
break;
|
||||
case HttpUrlSecurityZone.Intranet:
|
||||
reasons.Add("Windows classifies this URL as Intranet zone");
|
||||
break;
|
||||
case HttpUrlSecurityZone.Restricted:
|
||||
reasons.Add("Windows classifies this URL as Restricted zone");
|
||||
break;
|
||||
}
|
||||
|
||||
var distinctReasons = reasons
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToArray();
|
||||
|
||||
return new HttpUrlRiskProfile(
|
||||
uri.AbsoluteUri,
|
||||
GetCanonicalOrigin(uri),
|
||||
uri.Authority.ToLowerInvariant(),
|
||||
zone,
|
||||
distinctReasons.Length > 0,
|
||||
distinctReasons);
|
||||
}
|
||||
|
||||
public static bool IsPublicAddress(IPAddress ip)
|
||||
{
|
||||
if (IPAddress.IsLoopback(ip)) return false;
|
||||
if (ip.IsIPv4MappedToIPv6) return IsPublicAddress(ip.MapToIPv4());
|
||||
|
||||
if (ip.AddressFamily == AddressFamily.InterNetwork)
|
||||
{
|
||||
var b = ip.GetAddressBytes();
|
||||
if (b[0] == 0) return false;
|
||||
if (b[0] == 10) return false;
|
||||
if (b[0] == 100 && (b[1] & 0xC0) == 64) return false;
|
||||
if (b[0] == 127) return false;
|
||||
if (b[0] == 169 && b[1] == 254) return false;
|
||||
if (b[0] == 172 && b[1] >= 16 && b[1] <= 31) return false;
|
||||
if (b[0] == 192 && b[1] == 168) return false;
|
||||
if (b[0] >= 224) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ip.AddressFamily == AddressFamily.InterNetworkV6)
|
||||
{
|
||||
// Unspecified (::) — never a routable destination.
|
||||
if (ip.Equals(IPAddress.IPv6None)) return false;
|
||||
if (ip.IsIPv6LinkLocal) return false;
|
||||
if (ip.IsIPv6SiteLocal) return false;
|
||||
if (ip.IsIPv6Multicast) return false;
|
||||
var b = ip.GetAddressBytes();
|
||||
// Unique-local fc00::/7 (existing).
|
||||
if ((b[0] & 0xFE) == 0xFC) return false;
|
||||
// ::ffff:0:0/96 — IPv4-mapped (caught above by IsIPv4MappedToIPv6,
|
||||
// but defensively re-check).
|
||||
if (b[0] == 0 && b[1] == 0 && b[2] == 0 && b[3] == 0 &&
|
||||
b[4] == 0 && b[5] == 0 && b[6] == 0 && b[7] == 0 &&
|
||||
b[8] == 0 && b[9] == 0 && b[10] == 0xFF && b[11] == 0xFF)
|
||||
{
|
||||
var mapped = new IPAddress(new[] { b[12], b[13], b[14], b[15] });
|
||||
return IsPublicAddress(mapped);
|
||||
}
|
||||
// ::/96 IPv4-compatible (deprecated; treat as non-public — never legit
|
||||
// for an agent-supplied URL).
|
||||
if (b[0] == 0 && b[1] == 0 && b[2] == 0 && b[3] == 0 &&
|
||||
b[4] == 0 && b[5] == 0 && b[6] == 0 && b[7] == 0 &&
|
||||
b[8] == 0 && b[9] == 0 && b[10] == 0 && b[11] == 0)
|
||||
return false;
|
||||
// 2001:db8::/32 — documentation prefix (RFC 3849).
|
||||
if (b[0] == 0x20 && b[1] == 0x01 && b[2] == 0x0D && b[3] == 0xB8) return false;
|
||||
// 2001:0000::/32 — Teredo (relay tunneling; not a normal target).
|
||||
if (b[0] == 0x20 && b[1] == 0x01 && b[2] == 0x00 && b[3] == 0x00) return false;
|
||||
// 100::/64 — discard-only address block (RFC 6666).
|
||||
if (b[0] == 0x01 && b[1] == 0x00 &&
|
||||
b[2] == 0 && b[3] == 0 && b[4] == 0 && b[5] == 0 && b[6] == 0 && b[7] == 0)
|
||||
return false;
|
||||
// 2002::/16 — 6to4. Block: payload destination is unverifiable.
|
||||
if (b[0] == 0x20 && b[1] == 0x02) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void AddAddressRiskReasons(IPAddress ip, List<string> reasons)
|
||||
{
|
||||
if (IPAddress.IsLoopback(ip))
|
||||
{
|
||||
reasons.Add("Address is loopback");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!IsPublicAddress(ip))
|
||||
reasons.Add("Address is private, link-local, multicast, or reserved");
|
||||
}
|
||||
|
||||
private static string GetCanonicalOrigin(Uri uri)
|
||||
{
|
||||
var origin = uri.GetLeftPart(UriPartial.Authority);
|
||||
return origin.EndsWith("/", StringComparison.Ordinal) ? origin : origin + "/";
|
||||
}
|
||||
|
||||
private static HttpUrlSecurityZone MapUrlToZone(string url)
|
||||
{
|
||||
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
|
||||
IInternetSecurityManager? manager = null;
|
||||
try
|
||||
{
|
||||
var type = Type.GetTypeFromCLSID(
|
||||
new Guid("7b8a2d94-0ac9-11d1-896c-00c04fb6bfc4"),
|
||||
throwOnError: false);
|
||||
if (type == null)
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
|
||||
manager = Activator.CreateInstance(type) as IInternetSecurityManager;
|
||||
if (manager == null)
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
|
||||
var hr = manager.MapUrlToZone(url, out var zone, 0);
|
||||
if (hr != 0)
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
return Enum.IsDefined(typeof(HttpUrlSecurityZone), zone)
|
||||
? (HttpUrlSecurityZone)zone
|
||||
: HttpUrlSecurityZone.Unknown;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (manager != null)
|
||||
{
|
||||
try { Marshal.ReleaseComObject(manager); } catch { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[ComImport]
|
||||
[Guid("79eac9ee-baf9-11ce-8c82-00aa004ba90b")]
|
||||
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
|
||||
private interface IInternetSecurityManager
|
||||
{
|
||||
[PreserveSig]
|
||||
int SetSecuritySite(IntPtr site);
|
||||
|
||||
[PreserveSig]
|
||||
int GetSecuritySite(out IntPtr site);
|
||||
|
||||
[PreserveSig]
|
||||
int MapUrlToZone(
|
||||
[MarshalAs(UnmanagedType.LPWStr)] string url,
|
||||
out int zone,
|
||||
int flags);
|
||||
}
|
||||
}
|
||||
67
src/OpenClaw.Shared/HttpUrlValidator.cs
Normal file
67
src/OpenClaw.Shared/HttpUrlValidator.cs
Normal file
@ -0,0 +1,67 @@
|
||||
using System;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// Strict validator for agent-supplied URLs that the node will hand off to a
|
||||
/// browser via shell-execute. Defense-in-depth around <c>canvas.navigate</c>:
|
||||
/// the gateway should already only emit http(s), but treating that as
|
||||
/// authoritative would let a misbehaving / compromised agent ask the node to
|
||||
/// shell-execute <c>file:</c>, <c>javascript:</c>, app-protocol URIs, or
|
||||
/// credential-stuffed URLs that visually masquerade as legitimate.
|
||||
/// </summary>
|
||||
public static class HttpUrlValidator
|
||||
{
|
||||
/// <summary>
|
||||
/// Parse <paramref name="raw"/> and accept only absolute http/https URLs
|
||||
/// with a non-empty host and no userinfo. On success, <paramref name="canonical"/>
|
||||
/// is the re-serialized form (<see cref="Uri.AbsoluteUri"/>) — what the
|
||||
/// caller should hand to the OS, not the raw input string.
|
||||
/// </summary>
|
||||
public static bool TryParse(string? raw, out string? canonical, out string? error)
|
||||
{
|
||||
canonical = null;
|
||||
error = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(raw))
|
||||
{
|
||||
error = "url is empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
var trimmed = raw.Trim();
|
||||
if (!Uri.TryCreate(trimmed, UriKind.Absolute, out var uri))
|
||||
{
|
||||
error = "url is not an absolute URI";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Scheme check is ordinal-ignore-case: Uri lowercases the scheme on
|
||||
// parse, but explicit comparison documents intent and survives any
|
||||
// future Uri changes.
|
||||
if (!string.Equals(uri.Scheme, Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.Equals(uri.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
error = $"scheme '{uri.Scheme}' is not allowed (only http/https)";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(uri.Host))
|
||||
{
|
||||
error = "url has no host";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reject userinfo: https://attacker@evil.com is technically valid HTTP
|
||||
// but is a phishing pattern (the visible "attacker" looks like a host
|
||||
// to non-experts). Browsers warn on these too.
|
||||
if (!string.IsNullOrEmpty(uri.UserInfo))
|
||||
{
|
||||
error = "url contains userinfo (user:password@) which is not allowed";
|
||||
return false;
|
||||
}
|
||||
|
||||
canonical = uri.AbsoluteUri;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
27
src/OpenClaw.Shared/IDeviceStatusProvider.cs
Normal file
27
src/OpenClaw.Shared/IDeviceStatusProvider.cs
Normal file
@ -0,0 +1,27 @@
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// Provider interface for platform-specific device status data collection.
|
||||
/// Each method returns an object that will be serialized to JSON.
|
||||
/// Implementations should handle their own error cases gracefully.
|
||||
/// </summary>
|
||||
public interface IDeviceStatusProvider : IDisposable
|
||||
{
|
||||
/// <summary>OS version, architecture, machine name, uptime.</summary>
|
||||
object GetOsInfo();
|
||||
|
||||
/// <summary>CPU name, logical processor count, usage percent (may be null during warm-up).</summary>
|
||||
Task<object> GetCpuInfoAsync();
|
||||
|
||||
/// <summary>Total/available memory in bytes and usage percent.</summary>
|
||||
object GetMemoryInfo();
|
||||
|
||||
/// <summary>Fixed drive info: name, label, total/free bytes, usage percent, format.</summary>
|
||||
object GetDiskInfo();
|
||||
|
||||
/// <summary>Battery presence, charge level, charging state, estimated time remaining.</summary>
|
||||
object GetBatteryInfo();
|
||||
}
|
||||
@ -1,6 +1,5 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
@ -165,21 +164,23 @@ public class LocalCommandRunner : ICommandRunner
|
||||
|
||||
private static (string fileName, string arguments) BuildProcessArgs(CommandRequest request)
|
||||
{
|
||||
var shell = (request.Shell ?? "powershell").ToLowerInvariant();
|
||||
var shell = request.Shell ?? "powershell";
|
||||
var command = request.Command;
|
||||
var isCmd = shell == "cmd";
|
||||
var isCmd = shell.Equals("cmd", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
if (request.Args is { Length: > 0 })
|
||||
{
|
||||
command = command + " " + string.Join(" ", request.Args.Select(a => ShellQuoting.QuoteForShell(a, isCmd)));
|
||||
var quoted = new string[request.Args.Length];
|
||||
for (var i = 0; i < request.Args.Length; i++)
|
||||
quoted[i] = ShellQuoting.QuoteForShell(request.Args[i], isCmd);
|
||||
command = command + " " + string.Join(" ", quoted);
|
||||
}
|
||||
|
||||
return shell switch
|
||||
{
|
||||
"cmd" => ("cmd.exe", $"/C {command}"),
|
||||
"pwsh" => ("pwsh.exe", $"-NoProfile -NonInteractive -Command {command}"),
|
||||
_ => ("powershell.exe", $"-NoProfile -NonInteractive -Command {command}")
|
||||
};
|
||||
if (isCmd)
|
||||
return ("cmd.exe", $"/C {command}");
|
||||
if (shell.Equals("pwsh", StringComparison.OrdinalIgnoreCase))
|
||||
return ("pwsh.exe", $"-NoProfile -NonInteractive -Command {command}");
|
||||
return ("powershell.exe", $"-NoProfile -NonInteractive -Command {command}");
|
||||
}
|
||||
|
||||
private void KillProcess(Process process)
|
||||
|
||||
25
src/OpenClaw.Shared/LocalGatewayUrlClassifier.cs
Normal file
25
src/OpenClaw.Shared/LocalGatewayUrlClassifier.cs
Normal file
@ -0,0 +1,25 @@
|
||||
using System;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// Shared literal-host classifier for gateway URLs that point at the local machine.
|
||||
/// </summary>
|
||||
public static class LocalGatewayUrlClassifier
|
||||
{
|
||||
public static bool IsLocalGatewayUrl(string url)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(url)) return false;
|
||||
|
||||
try
|
||||
{
|
||||
var uri = new Uri(url);
|
||||
var host = uri.Host.ToLowerInvariant();
|
||||
return host is "localhost" or "127.0.0.1" or "::1" or "[::1]";
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
284
src/OpenClaw.Shared/Mcp/McpAuthToken.cs
Normal file
284
src/OpenClaw.Shared/Mcp/McpAuthToken.cs
Normal file
@ -0,0 +1,284 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Runtime.Versioning;
|
||||
using System.Security.AccessControl;
|
||||
using System.Security.Cryptography;
|
||||
using System.Security.Principal;
|
||||
using System.Text;
|
||||
|
||||
namespace OpenClaw.Shared.Mcp;
|
||||
|
||||
/// <summary>
|
||||
/// Manages the MCP server's bearer token.
|
||||
///
|
||||
/// The token lives next to the rest of the tray's settings, at
|
||||
/// <c>%APPDATA%\OpenClawTray\mcp-token.txt</c> (the exact path is composed by
|
||||
/// the tray from <c>SettingsManager.SettingsDirectoryPath</c> and surfaced as
|
||||
/// <c>NodeService.McpTokenPath</c> — that's the source of truth, not anything
|
||||
/// in this file). Co-locating with settings means the test-suite override
|
||||
/// <c>OPENCLAW_TRAY_DATA_DIR</c> isolates the token file too.
|
||||
///
|
||||
/// The token is **created lazily on first MCP server start** (i.e. the first
|
||||
/// time the user enables Local MCP Server in Settings — until then the file
|
||||
/// does not exist) and then **persists across tray restarts**. Local CLIs and
|
||||
/// per-user agent registrations read the file and send the contents on every
|
||||
/// request as <c>Authorization: Bearer <contents></c>.
|
||||
///
|
||||
/// Defense in depth: the file inherits the parent directory's ACL — by default
|
||||
/// only the current user (and SYSTEM/Administrators) can read it; the listener
|
||||
/// is bound to loopback so the endpoint is invisible to other machines; and
|
||||
/// Origin/Host checks block browser cross-origin attacks. The bearer is the
|
||||
/// last line of defense against an untrusted local process on the same box.
|
||||
/// </summary>
|
||||
public static class McpAuthToken
|
||||
{
|
||||
private const string FileName = "mcp-token.txt";
|
||||
|
||||
/// <summary>
|
||||
/// Fallback path used only when a caller doesn't supply one. The tray itself
|
||||
/// passes a path computed from <c>SettingsManager.SettingsDirectoryPath</c>
|
||||
/// (exposed as <c>NodeService.McpTokenPath</c>) so this constant is **not**
|
||||
/// the live location for OpenClaw Tray installations — it's only a default
|
||||
/// for non-tray consumers (CLIs, tests) that don't want to compute one.
|
||||
/// </summary>
|
||||
public static string DefaultPath
|
||||
{
|
||||
get
|
||||
{
|
||||
var root = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
|
||||
return Path.Combine(root, "OpenClaw", FileName);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Load the token from <see cref="DefaultPath"/>, creating a fresh random
|
||||
/// one if the file does not exist. Returns the token string.
|
||||
/// </summary>
|
||||
public static string LoadOrCreate() => LoadOrCreate(DefaultPath);
|
||||
|
||||
public static string LoadOrCreate(string path)
|
||||
{
|
||||
// The previous behavior would catch any read exception and silently
|
||||
// regenerate. A transient lock or AV scan would then *rotate the
|
||||
// token*, breaking every configured MCP client. Distinguish missing
|
||||
// (regenerate) from unreadable (throw — visible in startup logs).
|
||||
if (File.Exists(path))
|
||||
{
|
||||
string existing;
|
||||
try
|
||||
{
|
||||
existing = File.ReadAllText(path).Trim();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new IOException(
|
||||
$"MCP token file at '{path}' exists but could not be read: {ex.Message}. " +
|
||||
"Refusing to regenerate (would invalidate all configured clients).", ex);
|
||||
}
|
||||
if (!string.IsNullOrEmpty(existing)) return existing;
|
||||
// Empty file: treat as missing. The atomic write below replaces it.
|
||||
}
|
||||
var dir = Path.GetDirectoryName(path);
|
||||
if (!string.IsNullOrEmpty(dir))
|
||||
{
|
||||
Directory.CreateDirectory(dir);
|
||||
TryRestrictDirectoryAcl(dir);
|
||||
}
|
||||
// Atomic create: stage to a sibling temp file, lock its ACL, then
|
||||
// rename over the target. Without this, a power-loss / process-kill
|
||||
// mid-write would leave a zero-byte token file which the next
|
||||
// LoadOrCreate would treat as "missing" and overwrite — silently
|
||||
// rotating the token.
|
||||
var token = Generate();
|
||||
var tempPath = Path.Combine(
|
||||
string.IsNullOrEmpty(dir) ? Environment.CurrentDirectory : dir,
|
||||
$".{Path.GetFileName(path)}.{Guid.NewGuid():N}.tmp");
|
||||
try
|
||||
{
|
||||
File.WriteAllText(tempPath, token, Encoding.UTF8);
|
||||
TryRestrictSensitiveFileAcl(tempPath);
|
||||
File.Move(tempPath, path, overwrite: true);
|
||||
}
|
||||
catch
|
||||
{
|
||||
try { if (File.Exists(tempPath)) File.Delete(tempPath); } catch { }
|
||||
throw;
|
||||
}
|
||||
TryRestrictSensitiveFileAcl(path);
|
||||
return token;
|
||||
}
|
||||
|
||||
public static string Reset(string path)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(path))
|
||||
throw new ArgumentException("Token path is required", nameof(path));
|
||||
|
||||
var dir = Path.GetDirectoryName(path);
|
||||
if (!string.IsNullOrEmpty(dir))
|
||||
{
|
||||
Directory.CreateDirectory(dir);
|
||||
TryRestrictDirectoryAcl(dir);
|
||||
}
|
||||
|
||||
var token = Generate();
|
||||
var tempPath = Path.Combine(
|
||||
string.IsNullOrEmpty(dir) ? Environment.CurrentDirectory : dir,
|
||||
$".{Path.GetFileName(path)}.{Guid.NewGuid():N}.tmp");
|
||||
try
|
||||
{
|
||||
File.WriteAllText(tempPath, token, Encoding.UTF8);
|
||||
TryRestrictSensitiveFileAcl(tempPath);
|
||||
File.Move(tempPath, path, overwrite: true);
|
||||
}
|
||||
catch
|
||||
{
|
||||
try { if (File.Exists(tempPath)) File.Delete(tempPath); } catch { }
|
||||
throw;
|
||||
}
|
||||
// Move on Windows preserves the source's DACL; re-apply defensively in
|
||||
// case a future rename strategy substitutes a different file.
|
||||
TryRestrictSensitiveFileAcl(path);
|
||||
return token;
|
||||
}
|
||||
|
||||
/// <summary>Read the token without creating a new one. Returns null when missing.</summary>
|
||||
public static string? TryLoad(string? path = null)
|
||||
{
|
||||
path ??= DefaultPath;
|
||||
try
|
||||
{
|
||||
if (!File.Exists(path)) return null;
|
||||
var token = File.ReadAllText(path).Trim();
|
||||
return string.IsNullOrEmpty(token) ? null : token;
|
||||
}
|
||||
catch { return null; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the token file at <paramref name="path"/> is owned by the
|
||||
/// current user and not readable by anyone outside (Owner, SYSTEM,
|
||||
/// Administrators). Returns null if the file looks fine; returns a
|
||||
/// human-readable warning otherwise so callers can log/toast at startup.
|
||||
/// On non-Windows or when the file does not exist, returns null.
|
||||
/// </summary>
|
||||
public static string? VerifyAcl(string path)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path) || !File.Exists(path)) return null;
|
||||
if (!OperatingSystem.IsWindows()) return null;
|
||||
return VerifyFileAclWindows(path);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Best-effort: lock the supplied directory's ACL to current user + SYSTEM
|
||||
/// + Administrators with inheritance disabled. No-op on non-Windows.
|
||||
/// Callers should call this when the tray's data directory is created so
|
||||
/// other locally-installed apps under the same user can't read the token
|
||||
/// (or anything else we drop alongside it).
|
||||
/// </summary>
|
||||
public static void TryRestrictDataDirectoryAcl(string dir)
|
||||
{
|
||||
if (string.IsNullOrEmpty(dir)) return;
|
||||
if (!OperatingSystem.IsWindows()) return;
|
||||
try { RestrictDirectoryAclWindows(dir); }
|
||||
catch { /* best-effort; acl restriction is defense-in-depth, not load-bearing */ }
|
||||
}
|
||||
|
||||
public static void TryRestrictSensitiveFileAcl(string path)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path)) return;
|
||||
if (!OperatingSystem.IsWindows()) return;
|
||||
try { RestrictFileAclWindows(path); }
|
||||
catch { /* see above */ }
|
||||
}
|
||||
|
||||
private static void TryRestrictDirectoryAcl(string dir) => TryRestrictDataDirectoryAcl(dir);
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
private static void RestrictFileAclWindows(string path)
|
||||
{
|
||||
var info = new FileInfo(path);
|
||||
var sec = new FileSecurity();
|
||||
sec.SetAccessRuleProtection(isProtected: true, preserveInheritance: false);
|
||||
var owner = WindowsIdentity.GetCurrent().User;
|
||||
if (owner == null) return;
|
||||
sec.SetOwner(owner);
|
||||
sec.AddAccessRule(new FileSystemAccessRule(owner,
|
||||
FileSystemRights.FullControl, AccessControlType.Allow));
|
||||
sec.AddAccessRule(new FileSystemAccessRule(
|
||||
new SecurityIdentifier(WellKnownSidType.LocalSystemSid, null),
|
||||
FileSystemRights.FullControl, AccessControlType.Allow));
|
||||
sec.AddAccessRule(new FileSystemAccessRule(
|
||||
new SecurityIdentifier(WellKnownSidType.BuiltinAdministratorsSid, null),
|
||||
FileSystemRights.FullControl, AccessControlType.Allow));
|
||||
info.SetAccessControl(sec);
|
||||
}
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
private static void RestrictDirectoryAclWindows(string dir)
|
||||
{
|
||||
var info = new DirectoryInfo(dir);
|
||||
var sec = new DirectorySecurity();
|
||||
sec.SetAccessRuleProtection(isProtected: true, preserveInheritance: false);
|
||||
var owner = WindowsIdentity.GetCurrent().User;
|
||||
if (owner == null) return;
|
||||
sec.SetOwner(owner);
|
||||
var inherit = InheritanceFlags.ContainerInherit | InheritanceFlags.ObjectInherit;
|
||||
sec.AddAccessRule(new FileSystemAccessRule(owner,
|
||||
FileSystemRights.FullControl, inherit, PropagationFlags.None, AccessControlType.Allow));
|
||||
sec.AddAccessRule(new FileSystemAccessRule(
|
||||
new SecurityIdentifier(WellKnownSidType.LocalSystemSid, null),
|
||||
FileSystemRights.FullControl, inherit, PropagationFlags.None, AccessControlType.Allow));
|
||||
sec.AddAccessRule(new FileSystemAccessRule(
|
||||
new SecurityIdentifier(WellKnownSidType.BuiltinAdministratorsSid, null),
|
||||
FileSystemRights.FullControl, inherit, PropagationFlags.None, AccessControlType.Allow));
|
||||
info.SetAccessControl(sec);
|
||||
}
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
private static string? VerifyFileAclWindows(string path)
|
||||
{
|
||||
try
|
||||
{
|
||||
var info = new FileInfo(path);
|
||||
var sec = info.GetAccessControl();
|
||||
var ownerSid = sec.GetOwner(typeof(SecurityIdentifier)) as SecurityIdentifier;
|
||||
var current = WindowsIdentity.GetCurrent().User;
|
||||
if (current == null) return null;
|
||||
if (ownerSid == null || !ownerSid.Equals(current))
|
||||
{
|
||||
return $"MCP token file owner is {ownerSid?.Value ?? "<unknown>"}; expected current user {current.Value}. Treat the token as compromised and reset it.";
|
||||
}
|
||||
// Walk the ACL — anything granting read rights to a principal
|
||||
// outside {current user, SYSTEM, Administrators} is broader than
|
||||
// expected.
|
||||
var system = new SecurityIdentifier(WellKnownSidType.LocalSystemSid, null);
|
||||
var admins = new SecurityIdentifier(WellKnownSidType.BuiltinAdministratorsSid, null);
|
||||
var rules = sec.GetAccessRules(true, true, typeof(SecurityIdentifier));
|
||||
foreach (FileSystemAccessRule rule in rules)
|
||||
{
|
||||
if (rule.AccessControlType != AccessControlType.Allow) continue;
|
||||
if ((rule.FileSystemRights & (FileSystemRights.Read | FileSystemRights.ReadAndExecute | FileSystemRights.ReadData | FileSystemRights.FullControl | FileSystemRights.Modify)) == 0) continue;
|
||||
if (rule.IdentityReference is SecurityIdentifier sid &&
|
||||
(sid.Equals(current) || sid.Equals(system) || sid.Equals(admins)))
|
||||
continue;
|
||||
return $"MCP token file ACL grants read access to {rule.IdentityReference.Value}, broader than expected. Reset the token if this is unexpected.";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return $"MCP token ACL inspection failed: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>32 bytes (256 bits) of CSPRNG → base64url → 43 ASCII chars (no padding).</summary>
|
||||
private static string Generate()
|
||||
{
|
||||
Span<byte> raw = stackalloc byte[32];
|
||||
RandomNumberGenerator.Fill(raw);
|
||||
return Convert.ToBase64String(raw)
|
||||
.Replace('+', '-')
|
||||
.Replace('/', '_')
|
||||
.TrimEnd('=');
|
||||
}
|
||||
}
|
||||
479
src/OpenClaw.Shared/Mcp/McpHttpServer.cs
Normal file
479
src/OpenClaw.Shared/Mcp/McpHttpServer.cs
Normal file
@ -0,0 +1,479 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Mcp;
|
||||
|
||||
/// <summary>
|
||||
/// Localhost-only HTTP transport for the MCP server.
|
||||
///
|
||||
/// Security model — three layers:
|
||||
/// 1. Loopback bind (127.0.0.1). Unreachable from another machine, regardless
|
||||
/// of firewall configuration.
|
||||
/// 2. Defensive IsLoopback check on every request.
|
||||
/// 3. Browser/CSRF gate: a browser tab fetching http://127.0.0.1:8765/ is
|
||||
/// *also* on the loopback interface, so loopback alone does not protect
|
||||
/// against a malicious page. We reject any request that:
|
||||
/// - presents an Origin header (real MCP clients do not send Origin),
|
||||
/// - has a Host header that is not 127.0.0.1/localhost,
|
||||
/// - is a POST with Content-Type other than application/json.
|
||||
/// Together these force a CORS preflight from a browser, which we never
|
||||
/// satisfy (no Access-Control-Allow-Origin), so the cross-origin call
|
||||
/// fails before reaching capability code.
|
||||
///
|
||||
/// Bearer-token auth in front of request dispatch. Required on every request
|
||||
/// when constructed with a non-null token (the tray always passes one — see
|
||||
/// <c>NodeService.McpTokenPath</c> / <c>McpAuthToken.LoadOrCreate</c>; legacy
|
||||
/// callers that pass null disable the check, kept for in-process tests). The
|
||||
/// token defends against untrusted local processes that could otherwise reach
|
||||
/// the predictable 127.0.0.1:port endpoint — a process running as the same
|
||||
/// user on the same box can read the token file and would defeat this layer,
|
||||
/// but anything sandboxed away from <c>%APPDATA%\OpenClawTray\</c> cannot.
|
||||
///
|
||||
/// Stability defenses (CR-003/CR-005):
|
||||
/// - Per-request hard deadline (RequestTimeoutMs) bounds body-read and
|
||||
/// bridge dispatch so a slow or hung client cannot pin a handler slot
|
||||
/// forever.
|
||||
/// - Active handler tasks are tracked so Stop/Dispose can drain in-flight
|
||||
/// work before tearing down the semaphore and capability services.
|
||||
/// </summary>
|
||||
public sealed class McpHttpServer : IDisposable
|
||||
{
|
||||
private const long MaxRequestBodyBytes = 4L * 1024 * 1024; // 4 MiB
|
||||
// 16 leaves headroom for parallel tool callers (e.g. an editor + Claude
|
||||
// Desktop + a CLI script) without making each connection cheap enough to
|
||||
// become a DoS lever — request size cap + per-handler timeout still bound
|
||||
// memory. Bumped from 8 after queue-stall reports under multi-IDE use.
|
||||
private const int MaxConcurrentHandlers = 16;
|
||||
// Sized to cover the longest legitimate capability: screen.record up to
|
||||
// 300s plus encoding + serialization. Earlier 90s deadline silently abort
|
||||
// ed valid recording requests while the OS capture pipeline kept running
|
||||
// unobserved (unified review H10). Cancellation now flows through the
|
||||
// capability via INodeCapability.ExecuteAsync(NodeInvokeRequest, CT) so
|
||||
// tools that opt in actually stop the underlying work.
|
||||
private const int RequestTimeoutMs = 360_000;
|
||||
// How long Dispose waits for in-flight handlers to drain before forcing
|
||||
// tear-down. Past this point handlers may observe disposed services.
|
||||
private static readonly TimeSpan DrainTimeout = TimeSpan.FromSeconds(5);
|
||||
|
||||
private readonly McpToolBridge _bridge;
|
||||
private readonly int _port;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly HttpListener _listener;
|
||||
/// <summary>
|
||||
/// Required bearer token for HTTP requests. Empty/null disables auth (the
|
||||
/// pre-auth contract — kept so existing dev configs keep working). When set,
|
||||
/// every request must carry <c>Authorization: Bearer <token></c>.
|
||||
/// </summary>
|
||||
private string? _authToken;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
private readonly SemaphoreSlim _handlerLimiter = new(MaxConcurrentHandlers, MaxConcurrentHandlers);
|
||||
private readonly object _activeLock = new();
|
||||
private readonly HashSet<Task> _activeHandlers = new();
|
||||
private Task? _acceptLoop;
|
||||
private int _disposed;
|
||||
private int _stopping;
|
||||
|
||||
public int Port => _port;
|
||||
public string Endpoint => $"http://127.0.0.1:{_port}/";
|
||||
|
||||
public McpHttpServer(McpToolBridge bridge, int port, IOpenClawLogger logger, string? authToken = null)
|
||||
{
|
||||
_bridge = bridge ?? throw new ArgumentNullException(nameof(bridge));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_port = port;
|
||||
_authToken = string.IsNullOrEmpty(authToken) ? null : authToken;
|
||||
_listener = new HttpListener();
|
||||
// Loopback binding — not reachable from other machines. Use only the
|
||||
// numeric host on Windows so non-elevated startup does not require a
|
||||
// separate netsh http urlacl reservation for http://localhost:port/.
|
||||
_listener.Prefixes.Add($"http://127.0.0.1:{port}/");
|
||||
}
|
||||
|
||||
public void Start()
|
||||
{
|
||||
if (_listener.IsListening) return;
|
||||
_listener.Start();
|
||||
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
|
||||
_logger.Info($"[MCP] HTTP server listening on {Endpoint}");
|
||||
}
|
||||
|
||||
public void UpdateAuthToken(string? authToken)
|
||||
{
|
||||
Volatile.Write(ref _authToken, string.IsNullOrEmpty(authToken) ? null : authToken);
|
||||
}
|
||||
|
||||
private async Task AcceptLoopAsync(CancellationToken ct)
|
||||
{
|
||||
while (!ct.IsCancellationRequested && _listener.IsListening)
|
||||
{
|
||||
HttpListenerContext ctx;
|
||||
try
|
||||
{
|
||||
ctx = await _listener.GetContextAsync().ConfigureAwait(false);
|
||||
}
|
||||
catch (HttpListenerException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
if (ct.IsCancellationRequested) break;
|
||||
_logger.Error("[MCP] Accept failed", ex);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Defensive: even though the prefix is loopback-only, double-check.
|
||||
if (!IPAddress.IsLoopback(ctx.Request.RemoteEndPoint.Address))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Forbidden, "loopback only");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cap concurrent handlers — a misbehaving local client can otherwise
|
||||
// pin every threadpool thread on long-running screen/camera calls.
|
||||
// Wait briefly: a slot freed during typical request handoff is well
|
||||
// under 50ms, so a small queue here turns transient spikes into
|
||||
// success rather than 503s without inviting unbounded queueing.
|
||||
if (!await _handlerLimiter.WaitAsync(50, ct).ConfigureAwait(false))
|
||||
{
|
||||
Reject(ctx, (HttpStatusCode)503, "server busy");
|
||||
continue;
|
||||
}
|
||||
|
||||
// NOTE: do not pass `ct` to Task.Run. If the token is cancelled
|
||||
// between WaitAsync returning and the delegate starting, Task.Run
|
||||
// skips the delegate and the finally never runs — leaking a
|
||||
// semaphore slot. Let the delegate observe cancellation itself.
|
||||
var handlerTask = Task.Run(() => RunHandlerAsync(ctx));
|
||||
TrackHandler(handlerTask);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RunHandlerAsync(HttpListenerContext ctx)
|
||||
{
|
||||
// Per-request linked CTS: server shutdown OR per-request deadline.
|
||||
// The bridge call observes this so a wedged tool cannot pin the slot.
|
||||
using var requestCts = CancellationTokenSource.CreateLinkedTokenSource(_cts.Token);
|
||||
requestCts.CancelAfter(RequestTimeoutMs);
|
||||
try
|
||||
{
|
||||
await HandleAsync(ctx, requestCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Defensive: if Dispose has already disposed the limiter, swallow.
|
||||
// Without this guard, a handler racing with shutdown can throw
|
||||
// ObjectDisposedException into an unobserved task, which surfaces
|
||||
// through global unhandled-exception handlers.
|
||||
try { _handlerLimiter.Release(); }
|
||||
catch (ObjectDisposedException) { /* server torn down */ }
|
||||
catch (SemaphoreFullException) { /* defensive */ }
|
||||
}
|
||||
}
|
||||
|
||||
private void TrackHandler(Task task)
|
||||
{
|
||||
lock (_activeLock) { _activeHandlers.Add(task); }
|
||||
_ = task.ContinueWith(t =>
|
||||
{
|
||||
lock (_activeLock) { _activeHandlers.Remove(t); }
|
||||
}, CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default);
|
||||
}
|
||||
|
||||
private async Task HandleAsync(HttpListenerContext ctx, CancellationToken ct)
|
||||
{
|
||||
// Snapshot the auth token once. UpdateAuthToken can rotate _authToken
|
||||
// on another thread, and reading the field separately for the null-test
|
||||
// and the comparison would let a single request observe two different
|
||||
// values (e.g. enter the auth branch with the old token, then compare
|
||||
// against the new one — or vice versa).
|
||||
var authToken = Volatile.Read(ref _authToken);
|
||||
try
|
||||
{
|
||||
// CSRF/browser gate — reject anything carrying a browser Origin.
|
||||
// Real MCP HTTP clients (Claude Desktop, Cursor, Claude Code, curl)
|
||||
// do not set Origin. A browser fetch always does.
|
||||
var origin = ctx.Request.Headers["Origin"];
|
||||
if (!string.IsNullOrEmpty(origin))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Forbidden, "origin not allowed");
|
||||
return;
|
||||
}
|
||||
// Belt-and-suspenders: a browser may strip Origin (e.g. via a
|
||||
// privacy extension) but still send Sec-Fetch-Site / Sec-Fetch-Mode
|
||||
// / Referer. Treat any of those as evidence of a browser context.
|
||||
// Native MCP clients don't emit these headers.
|
||||
if (!string.IsNullOrEmpty(ctx.Request.Headers["Sec-Fetch-Site"]) ||
|
||||
!string.IsNullOrEmpty(ctx.Request.Headers["Sec-Fetch-Mode"]) ||
|
||||
!string.IsNullOrEmpty(ctx.Request.Headers["Referer"]))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Forbidden, "browser context not allowed");
|
||||
return;
|
||||
}
|
||||
|
||||
// Host header must match our loopback bind. Defends against DNS
|
||||
// rebinding pivots that route a public hostname to 127.0.0.1.
|
||||
if (!IsHostAllowed(ctx.Request.Headers["Host"]))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Forbidden, "host not allowed");
|
||||
return;
|
||||
}
|
||||
|
||||
// Bearer-token check. Defends against untrusted local processes
|
||||
// (browser helpers, editor extensions) that share the loopback
|
||||
// surface with the legitimate MCP client. Token lives in a
|
||||
// user-only-readable file under %LOCALAPPDATA%; CLI/agent
|
||||
// registration reads from there. Keep this before method dispatch
|
||||
// so alternate verbs cannot bypass the configured token gate.
|
||||
if (authToken != null && !IsAuthorized(authToken, ctx.Request.Headers["Authorization"]))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Unauthorized, "missing or invalid bearer token");
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.Request.HttpMethod == "GET")
|
||||
{
|
||||
// Friendly probe response — useful for confirming the server is up
|
||||
// from a curl/browser without hitting the JSON-RPC endpoint.
|
||||
WriteText(ctx.Response, HttpStatusCode.OK,
|
||||
$"OpenClaw MCP server. POST JSON-RPC to {Endpoint}", "text/plain");
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.Request.HttpMethod != "POST")
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.MethodNotAllowed, "POST only");
|
||||
return;
|
||||
}
|
||||
|
||||
// Force application/json on POST. Combined with the Origin check,
|
||||
// this means a browser cross-origin fetch must use a non-simple
|
||||
// Content-Type and trigger a CORS preflight, which we don't honor.
|
||||
var contentType = ctx.Request.ContentType ?? "";
|
||||
var semi = contentType.IndexOf(';');
|
||||
var contentTypeBase = (semi >= 0 ? contentType.Substring(0, semi) : contentType).Trim();
|
||||
if (!string.Equals(contentTypeBase, "application/json", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.UnsupportedMediaType, "application/json required");
|
||||
return;
|
||||
}
|
||||
|
||||
// Reject bodies that exceed our cap *before* reading them — a
|
||||
// multi-GB POST would otherwise OOM the tray.
|
||||
if (ctx.Request.ContentLength64 > MaxRequestBodyBytes)
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.RequestEntityTooLarge, "request body too large");
|
||||
return;
|
||||
}
|
||||
|
||||
string body;
|
||||
try
|
||||
{
|
||||
body = await ReadBodyAsync(ctx.Request, MaxRequestBodyBytes, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.RequestEntityTooLarge, "request body too large");
|
||||
return;
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
// Slow-body or stuck client — free the slot rather than blocking forever.
|
||||
Reject(ctx, HttpStatusCode.RequestTimeout, "request timed out");
|
||||
return;
|
||||
}
|
||||
|
||||
string? responseBody;
|
||||
try
|
||||
{
|
||||
responseBody = await _bridge.HandleRequestAsync(body, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.RequestTimeout, "request timed out");
|
||||
return;
|
||||
}
|
||||
|
||||
if (responseBody == null)
|
||||
{
|
||||
// Notification — JSON-RPC says no body. 204 is the most honest signal.
|
||||
ctx.Response.StatusCode = (int)HttpStatusCode.NoContent;
|
||||
ctx.Response.Close();
|
||||
return;
|
||||
}
|
||||
|
||||
WriteText(ctx.Response, HttpStatusCode.OK, responseBody, "application/json");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Error("[MCP] Request failed", ex);
|
||||
// Response may already be partially written or closed; swallow.
|
||||
try { Reject(ctx, HttpStatusCode.InternalServerError, "internal error"); }
|
||||
catch { /* response already disposed */ }
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsAuthorized(string authToken, string? authHeader)
|
||||
{
|
||||
if (string.IsNullOrEmpty(authHeader)) return false;
|
||||
// Accept "Bearer <token>" (RFC 6750) — case-insensitive scheme, exact token.
|
||||
const string scheme = "Bearer ";
|
||||
if (!authHeader.StartsWith(scheme, StringComparison.OrdinalIgnoreCase)) return false;
|
||||
var presented = authHeader.Substring(scheme.Length).Trim();
|
||||
if (presented.Length != authToken.Length) return false;
|
||||
// Constant-time compare; both strings already known length.
|
||||
return CryptographicOperations.FixedTimeEquals(
|
||||
Encoding.UTF8.GetBytes(presented),
|
||||
Encoding.UTF8.GetBytes(authToken));
|
||||
}
|
||||
|
||||
private static bool IsHostAllowed(string? host)
|
||||
{
|
||||
if (string.IsNullOrEmpty(host)) return false;
|
||||
var trimmed = host.Trim();
|
||||
// IPv6 form: [::1]:port — strip the bracketed address.
|
||||
if (trimmed.StartsWith('['))
|
||||
{
|
||||
var closeBracket = trimmed.IndexOf(']');
|
||||
if (closeBracket < 0) return false;
|
||||
var v6 = trimmed.Substring(1, closeBracket - 1);
|
||||
return string.Equals(v6, "::1", StringComparison.Ordinal);
|
||||
}
|
||||
// IPv4 / hostname: strip trailing :port if present.
|
||||
var colon = trimmed.LastIndexOf(':');
|
||||
var hostname = (colon > 0 ? trimmed.Substring(0, colon) : trimmed).Trim();
|
||||
return string.Equals(hostname, "127.0.0.1", StringComparison.Ordinal)
|
||||
|| string.Equals(hostname, "::1", StringComparison.Ordinal)
|
||||
|| string.Equals(hostname, "localhost", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static async Task<string> ReadBodyAsync(HttpListenerRequest request, long maxBytes, CancellationToken ct)
|
||||
{
|
||||
// Bounded read — never trust ContentLength as a sole limit; the client
|
||||
// can send chunked encoding or just lie. Read up to maxBytes+1 and
|
||||
// throw if we crossed the cap. The cancellation token enforces the
|
||||
// per-request deadline so a slow-body client can't hold a handler slot.
|
||||
// Pool the read buffer so we don't allocate 8 KiB per request — under
|
||||
// load these are a noticeable LOH-adjacent allocation.
|
||||
var encoding = request.ContentEncoding ?? Encoding.UTF8;
|
||||
var buffer = System.Buffers.ArrayPool<byte>.Shared.Rent(8192);
|
||||
try
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
long total = 0;
|
||||
while (true)
|
||||
{
|
||||
var n = await request.InputStream.ReadAsync(buffer.AsMemory(0, buffer.Length), ct).ConfigureAwait(false);
|
||||
if (n <= 0) break;
|
||||
total += n;
|
||||
if (total > maxBytes) throw new InvalidDataException("request body exceeds cap");
|
||||
ms.Write(buffer, 0, n);
|
||||
}
|
||||
return encoding.GetString(ms.GetBuffer(), 0, (int)ms.Length);
|
||||
}
|
||||
finally
|
||||
{
|
||||
System.Buffers.ArrayPool<byte>.Shared.Return(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
private static void Reject(HttpListenerContext ctx, HttpStatusCode status, string reason)
|
||||
{
|
||||
try { WriteText(ctx.Response, status, reason, "text/plain"); }
|
||||
catch { /* response already disposed */ }
|
||||
}
|
||||
|
||||
private static void WriteText(HttpListenerResponse response, HttpStatusCode status, string body, string contentType)
|
||||
{
|
||||
var bytes = Encoding.UTF8.GetBytes(body);
|
||||
response.StatusCode = (int)status;
|
||||
response.ContentType = contentType;
|
||||
response.ContentLength64 = bytes.Length;
|
||||
using var output = response.OutputStream;
|
||||
output.Write(bytes, 0, bytes.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stop accepting new requests, cancel in-flight ones, and wait for
|
||||
/// active handlers to drain (or the timeout to elapse) before returning.
|
||||
/// Idempotent. Returns when it is safe to dispose downstream services
|
||||
/// (capabilities, capture services) without racing live handlers.
|
||||
/// </summary>
|
||||
public Task StopAsync(TimeSpan drainTimeout)
|
||||
{
|
||||
// Idempotence is governed by _stopping (not _disposed) so that Dispose
|
||||
// can call the same drain path *after* setting _disposed=1. The previous
|
||||
// code keyed on _disposed and silently skipped the drain in that flow.
|
||||
if (Interlocked.Exchange(ref _stopping, 1) != 0) return Task.CompletedTask;
|
||||
return StopCoreAsync(drainTimeout);
|
||||
}
|
||||
|
||||
private async Task StopCoreAsync(TimeSpan drainTimeout)
|
||||
{
|
||||
try { _cts.Cancel(); } catch { /* already cancelled or disposed */ }
|
||||
try { if (_listener.IsListening) _listener.Stop(); } catch { /* already stopped */ }
|
||||
|
||||
// Snapshot before awaiting — handlers remove themselves on completion,
|
||||
// and we don't want enumeration to race the continuation.
|
||||
Task[] toAwait;
|
||||
lock (_activeLock) { toAwait = new Task[_activeHandlers.Count]; _activeHandlers.CopyTo(toAwait); }
|
||||
|
||||
var allHandlers = Task.WhenAll(toAwait);
|
||||
var deadline = Task.Delay(drainTimeout);
|
||||
var winner = await Task.WhenAny(allHandlers, deadline).ConfigureAwait(false);
|
||||
if (winner == deadline && toAwait.Length > 0)
|
||||
{
|
||||
int still;
|
||||
lock (_activeLock) { still = _activeHandlers.Count; }
|
||||
_logger.Warn($"[MCP] Drain timeout ({drainTimeout.TotalSeconds:F1}s); {still} handler(s) still running");
|
||||
}
|
||||
|
||||
if (_acceptLoop != null)
|
||||
{
|
||||
try { await Task.WhenAny(_acceptLoop, Task.Delay(TimeSpan.FromSeconds(1))).ConfigureAwait(false); }
|
||||
catch { /* loop may have errored */ }
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (Interlocked.Exchange(ref _disposed, 1) != 0) return;
|
||||
// Run the drain unconditionally on dispose. We can't go through the
|
||||
// public StopAsync because a prior caller may already have set
|
||||
// _stopping — we still need to wait for the drain to finish before
|
||||
// tearing down the limiter and CTS.
|
||||
if (Interlocked.Exchange(ref _stopping, 1) == 0)
|
||||
{
|
||||
try { StopCoreAsync(DrainTimeout).GetAwaiter().GetResult(); }
|
||||
catch (Exception ex) { _logger.Warn($"[MCP] Drain error: {ex.Message}"); }
|
||||
}
|
||||
else
|
||||
{
|
||||
// A prior StopAsync is in flight; wait briefly for it to finish so
|
||||
// we don't dispose the limiter while a handler is still inside it.
|
||||
lock (_activeLock)
|
||||
{
|
||||
if (_activeHandlers.Count > 0)
|
||||
{
|
||||
Task[] toAwait = new Task[_activeHandlers.Count];
|
||||
_activeHandlers.CopyTo(toAwait);
|
||||
try { Task.WhenAny(Task.WhenAll(toAwait), Task.Delay(DrainTimeout)).GetAwaiter().GetResult(); }
|
||||
catch { /* swallow — best-effort */ }
|
||||
}
|
||||
}
|
||||
}
|
||||
try { _listener.Close(); } catch { /* already closed */ }
|
||||
_cts.Dispose();
|
||||
_handlerLimiter.Dispose();
|
||||
}
|
||||
}
|
||||
436
src/OpenClaw.Shared/Mcp/McpToolBridge.cs
Normal file
436
src/OpenClaw.Shared/Mcp/McpToolBridge.cs
Normal file
@ -0,0 +1,436 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Mcp;
|
||||
|
||||
/// <summary>
|
||||
/// Transport-agnostic MCP server core. Auto-discovers tools from the live
|
||||
/// <see cref="INodeCapability"/> registry — registering a new capability on
|
||||
/// the node client immediately exposes its commands as MCP tools.
|
||||
/// </summary>
|
||||
public class McpToolBridge
|
||||
{
|
||||
private const string ProtocolVersion = "2024-11-05";
|
||||
|
||||
private readonly Func<IReadOnlyList<INodeCapability>> _capabilityProvider;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly string _serverName;
|
||||
private readonly string _serverVersion;
|
||||
|
||||
private static readonly JsonSerializerOptions PayloadJsonOptions = new()
|
||||
{
|
||||
WriteIndented = false,
|
||||
};
|
||||
|
||||
public McpToolBridge(
|
||||
Func<IReadOnlyList<INodeCapability>> capabilityProvider,
|
||||
IOpenClawLogger? logger = null,
|
||||
string serverName = "openclaw-tray-mcp",
|
||||
string serverVersion = "0.0.0")
|
||||
{
|
||||
_capabilityProvider = capabilityProvider ?? throw new ArgumentNullException(nameof(capabilityProvider));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_serverName = serverName;
|
||||
_serverVersion = serverVersion;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Dispatch a JSON-RPC request body and return the response body (or null
|
||||
/// for a JSON-RPC notification, which receives no response).
|
||||
/// </summary>
|
||||
public Task<string?> HandleRequestAsync(string requestBody)
|
||||
=> HandleRequestAsync(requestBody, CancellationToken.None);
|
||||
|
||||
/// <summary>
|
||||
/// Dispatch a JSON-RPC request body, observing a cancellation token (used
|
||||
/// by the HTTP transport to enforce a per-request deadline). When the
|
||||
/// token fires during a tool dispatch, the call surfaces as a tool error
|
||||
/// ("request timed out") so the slot is freed even if the underlying
|
||||
/// capability work continues to run.
|
||||
/// </summary>
|
||||
public async Task<string?> HandleRequestAsync(string requestBody, CancellationToken cancellationToken)
|
||||
{
|
||||
JsonDocument doc;
|
||||
try
|
||||
{
|
||||
doc = JsonDocument.Parse(requestBody);
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
return WriteError(null, JsonRpcErrorCode.ParseError, $"Parse error: {ex.Message}");
|
||||
}
|
||||
|
||||
using (doc)
|
||||
{
|
||||
var root = doc.RootElement;
|
||||
if (root.ValueKind != JsonValueKind.Object)
|
||||
return WriteError(null, JsonRpcErrorCode.InvalidRequest, "Request must be a JSON object");
|
||||
|
||||
var idElement = root.TryGetProperty("id", out var idProp) ? idProp : (JsonElement?)null;
|
||||
var hasId = idElement.HasValue && idElement.Value.ValueKind != JsonValueKind.Null;
|
||||
|
||||
if (!root.TryGetProperty("method", out var methodProp) || methodProp.ValueKind != JsonValueKind.String)
|
||||
{
|
||||
return hasId
|
||||
? WriteError(idElement, JsonRpcErrorCode.InvalidRequest, "Missing 'method'")
|
||||
: null;
|
||||
}
|
||||
|
||||
var method = methodProp.GetString()!;
|
||||
var paramsElement = root.TryGetProperty("params", out var p) ? p : default;
|
||||
|
||||
try
|
||||
{
|
||||
object? result = method switch
|
||||
{
|
||||
"initialize" => HandleInitialize(),
|
||||
"ping" => new { },
|
||||
"notifications/initialized" => null,
|
||||
"tools/list" => HandleToolsList(),
|
||||
"tools/call" => await HandleToolsCallAsync(paramsElement, cancellationToken),
|
||||
// Some clients (notably Cursor) probe these on startup. Returning
|
||||
// empty lists is friendlier than MethodNotFound — both feature sets
|
||||
// are deferred but compatible by being absent rather than failing.
|
||||
"resources/list" => new { resources = Array.Empty<object>() },
|
||||
"prompts/list" => new { prompts = Array.Empty<object>() },
|
||||
_ => throw new McpMethodNotFoundException(method),
|
||||
};
|
||||
|
||||
if (!hasId) return null; // notification — no response
|
||||
return WriteResult(idElement, result ?? new { });
|
||||
}
|
||||
catch (McpMethodNotFoundException ex)
|
||||
{
|
||||
return hasId
|
||||
? WriteError(idElement, JsonRpcErrorCode.MethodNotFound, ex.Message)
|
||||
: null;
|
||||
}
|
||||
catch (McpToolException ex)
|
||||
{
|
||||
return hasId
|
||||
? WriteToolError(idElement, ex.Message)
|
||||
: null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Full exception with stack goes to the log; the wire response
|
||||
// gets a generic message so we don't leak internals to clients.
|
||||
_logger.Error($"[MCP] Handler error for {method}", ex);
|
||||
return hasId
|
||||
? WriteError(idElement, JsonRpcErrorCode.InternalError, "internal error")
|
||||
: null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private object HandleInitialize() => new
|
||||
{
|
||||
protocolVersion = ProtocolVersion,
|
||||
capabilities = new
|
||||
{
|
||||
tools = new { listChanged = false },
|
||||
},
|
||||
serverInfo = new
|
||||
{
|
||||
name = _serverName,
|
||||
version = _serverVersion,
|
||||
},
|
||||
};
|
||||
|
||||
private object HandleToolsList()
|
||||
{
|
||||
var caps = _capabilityProvider();
|
||||
var tools = new List<object>();
|
||||
foreach (var cap in caps)
|
||||
{
|
||||
foreach (var cmd in cap.Commands)
|
||||
{
|
||||
tools.Add(new
|
||||
{
|
||||
name = cmd,
|
||||
description = CommandDescriptions.TryGetValue(cmd, out var desc)
|
||||
? desc
|
||||
: $"{cap.Category} capability: {cmd}",
|
||||
inputSchema = new
|
||||
{
|
||||
type = "object",
|
||||
additionalProperties = true,
|
||||
properties = new { },
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
return new { tools };
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The complete set of commands documented in <see cref="CommandDescriptions"/>.
|
||||
/// Exposed as a stable surface so out-of-process documentation (winnode's
|
||||
/// skill.md) can be drift-tested against the canonical capability surface.
|
||||
/// </summary>
|
||||
public static IReadOnlyCollection<string> KnownCommands => CommandDescriptions.Keys;
|
||||
|
||||
/// <summary>
|
||||
/// Per-command descriptions advertised via <c>tools/list</c>. Sourced from
|
||||
/// the OpenClaw docs (docs/nodes/index.md, docs/platforms/mac/canvas.md) and
|
||||
/// the capability implementations under <c>OpenClaw.Shared.Capabilities</c>.
|
||||
/// Unknown commands fall back to a generic <c>{category} capability: {cmd}</c>
|
||||
/// label so newly-added capabilities still render before this table is updated.
|
||||
/// </summary>
|
||||
private static readonly Dictionary<string, string> CommandDescriptions = new(StringComparer.Ordinal)
|
||||
{
|
||||
// system.*
|
||||
["system.notify"] =
|
||||
"Show a Windows toast notification on the node. Args: title (string, default 'OpenClaw'), body (string), subtitle (string), sound (bool, default true). Returns { sent: true }.",
|
||||
["system.run"] =
|
||||
"Execute a shell command on the Windows node host. Args: command (string or string[] argv, required), args (string[]), shell (string), cwd (string), timeoutMs (int, default 30000), env (object). Subject to the local exec approval policy. Returns { stdout, stderr, exitCode, timedOut, durationMs }.",
|
||||
["system.run.prepare"] =
|
||||
"Pre-flight a system.run invocation: returns the parsed execution plan (argv, cwd, rawCommand, agentId, sessionKey) without running anything. The gateway uses this to build its approval context before the actual run.",
|
||||
["system.which"] =
|
||||
"Resolve executable names to absolute paths by searching PATH (PATHEXT-aware on Windows). Args: bins (string[], required). Returns { bins: { name: resolvedPath, ... } } including only names that were found.",
|
||||
["system.execApprovals.get"] =
|
||||
"Return the current exec approval policy: { enabled, defaultAction ('allow'|'deny'|'prompt'), rules: [{ pattern, action, shells, description, enabled }, ...] }.",
|
||||
["system.execApprovals.set"] =
|
||||
"Replace the exec approval policy. Args: rules (array of { pattern, action, shells?, description?, enabled? }), defaultAction (string, optional). Persisted to disk; used by future system.run calls.",
|
||||
|
||||
// canvas.* — agent-controlled WebView2 panel for HTML/CSS/JS, A2UI, and small interactive UI surfaces.
|
||||
["canvas.present"] =
|
||||
"Show the agent-controlled Canvas window (WebView2). Args: url (string) or html (string), width (int, default 800), height (int, default 600), x/y (int, -1 = center), title (string, default 'Canvas'), alwaysOnTop (bool, default false). The Canvas is a lightweight visual workspace for HTML/CSS/JS, A2UI, and small interactive UI surfaces.",
|
||||
["canvas.hide"] =
|
||||
"Hide the Canvas window without destroying its state.",
|
||||
["canvas.navigate"] =
|
||||
"Navigate the existing Canvas to a new location. Args: url (string, required) — accepts http(s), file://, or local canvas paths.",
|
||||
["canvas.eval"] =
|
||||
"Evaluate a JavaScript expression inside the Canvas WebView and return its result. Args: script | javaScript | javascript (string, required).",
|
||||
["canvas.snapshot"] =
|
||||
"Capture the Canvas viewport as a base64-encoded image. Args: format ('png'|'jpeg', default 'png'), maxWidth (int, default 1200), quality (int 1-100, default 80). Returns { format, base64 }.",
|
||||
["canvas.a2ui.push"] =
|
||||
"Push A2UI v0.8 server→client messages to the Canvas as JSONL. Supported message kinds: beginRendering, surfaceUpdate, dataModelUpdate, deleteSurface (createSurface / v0.9 is rejected). Args: jsonl (string) or jsonlPath (string, must live under the system temp directory), props (object, optional).",
|
||||
["canvas.a2ui.reset"] =
|
||||
"Reset the Canvas A2UI state, clearing any rendered surfaces.",
|
||||
["canvas.a2ui.dump"] =
|
||||
"READ-ALL: Return the full state of every currently-rendered A2UI surface — the component tree, every data-model entry, and any registered secret paths (values redacted). Operators granting MCP access should treat this as equivalent to a screenshot of every open surface, not a normal observability tool.",
|
||||
["canvas.caps"] =
|
||||
"Report the A2UI feature flags this canvas runtime supports (component catalog, max surfaces, render depth, value-size caps). Diagnostic; no side effects.",
|
||||
["canvas.a2ui.pushJSONL"] =
|
||||
"Streaming variant of canvas.a2ui.push for very large surfaces. Same protocol contract; jsonlPath argument must live under the system temp directory and is opened via FileStream + GetFinalPathNameByHandle to defeat reparse-point traversal.",
|
||||
|
||||
// screen.* — names match the canonical OpenClaw protocol
|
||||
// (apps/shared/OpenClawKit/Sources/OpenClawKit/ScreenCommands.swift).
|
||||
// No screen.list or screen.capture exist in the protocol; previous
|
||||
// drift advertised tools that didn't actually resolve.
|
||||
["screen.snapshot"] =
|
||||
"Capture a screenshot of the specified display. Args: format ('png'|'jpeg', default 'png'), maxWidth (int, default 1920), quality (int 1-100, default 80), monitor / screenIndex (int, default 0 = primary), includePointer (bool, default true). Returns { format, width, height, base64, image } where image is a data: URL.",
|
||||
["screen.record"] =
|
||||
"Record the specified display for a bounded duration. Args: durationMs (int, required, max 300000), format ('mp4'|'webm', default 'mp4'), monitor / screenIndex (int, default 0 = primary), maxWidth (int, default 1920), fps (int, default 30). Returns { format, durationMs, base64 }.",
|
||||
|
||||
// camera.*
|
||||
["camera.list"] =
|
||||
"List cameras attached to the Windows node. Returns { cameras: [{ deviceId, name, isDefault }, ...] }.",
|
||||
["camera.snap"] =
|
||||
"Capture a still photo from a camera. Args: deviceId (string, optional — defaults to system default camera), format ('jpeg'|'png', default 'jpeg'), maxWidth (int, default 1280), quality (int 1-100, default 80). Returns { format, width, height, base64 }.",
|
||||
["camera.clip"] =
|
||||
"Record a short clip from a camera. Args: deviceId (string, optional), durationMs (int, required, max 60000), format ('mp4'|'webm', default 'mp4'), maxWidth (int, default 1280). Returns { format, durationMs, base64 }.",
|
||||
|
||||
// stt.* — microphone capture → text. Default-off; privacy-sensitive.
|
||||
// Single engine: Whisper.net runs locally on the device.
|
||||
["stt.transcribe"] =
|
||||
"Capture microphone audio for a bounded duration and return the transcribed text. Args: maxDurationMs (int, required, > 0, max 30000), language (string, optional BCP-47 tag like 'en-US' or 'auto' — falls back to the configured SttLanguage setting). Returns { transcribed, text, durationMs, language, engineEffective ('whisper') }. Whisper model is downloaded on first use; until then this returns an error pointing to Voice Settings. Requires NodeSttEnabled.",
|
||||
["stt.listen"] =
|
||||
"Capture microphone audio with voice-activity detection and return when the user stops speaking, or after timeoutMs. Args: timeoutMs (int, optional, default 30000, range 1000..120000), language (string, optional BCP-47 tag or 'auto', default 'auto'). Returns { text, language, durationMs, segments[{ text, startMs, endMs }], engineEffective ('whisper') }. Result is the full silence-bounded utterance (all Whisper segments concatenated), not a partial first segment. Requires NodeSttEnabled.",
|
||||
["stt.status"] =
|
||||
"Report STT engine readiness. No args. Returns { engine ('whisper'), readiness ('ready'|'initializing'|'model-downloading'|'model-not-downloaded'|'unavailable'), modelDownloadProgress (0..1 or null), isListenWithVadSupported (bool), isBoundedTranscribeSupported (bool) }. Carries no PII (no transcript history, no language history, no device IDs, no model paths).",
|
||||
|
||||
// tts.*
|
||||
["tts.speak"] =
|
||||
"Speak text aloud on the Windows node. Args: text (string, required), provider ('piper'|'windows'|'elevenlabs', optional — falls back to the configured TtsProvider setting, default 'piper' for fresh installs), voiceId (string, optional — overrides the per-provider configured voice), model (string, optional, ElevenLabs only), interrupt (bool, default false — interrupts any in-progress playback). Returns { spoken, provider, contentType, durationMs }.",
|
||||
|
||||
// app.*
|
||||
["app.navigate"] =
|
||||
"Navigate the companion app to a specific page (e.g., 'home', 'sessions', 'settings'). Args: page (string, required). Returns { navigated, page }.",
|
||||
["app.status"] =
|
||||
"Get current connection status, node state, and gateway info. Returns { connectionStatus, nodeConnected, nodePaired, nodePendingApproval, gatewayVersion, sessionCount, nodeCount }.",
|
||||
["app.sessions"] =
|
||||
"List active sessions with optional agent filter. Args: agentId (string, optional). Returns array of { Key, Status, Model, AgeText, tokens }.",
|
||||
["app.agents"] =
|
||||
"List agents from the connected gateway. Returns the raw agents JSON array.",
|
||||
["app.nodes"] =
|
||||
"List connected nodes and their capabilities. Returns array of { DisplayName, NodeId, IsOnline, Platform, CapabilityCount }.",
|
||||
["app.config.get"] =
|
||||
"Read gateway configuration value at a dot-path. Args: path (string, optional). Returns the config subtree or full config.",
|
||||
["app.settings.get"] =
|
||||
"Read a local app setting by name. Args: name (string, required). Returns the setting value.",
|
||||
["app.settings.set"] =
|
||||
"Set a local app setting (name and value). Args: name (string, required), value (string, required). Returns { name, value }.",
|
||||
["app.menu"] =
|
||||
"Get tray menu state (status, session count, node count). Returns array of menu items.",
|
||||
["app.search"] =
|
||||
"Search the command palette and return matching commands. Args: query (string, required). Returns array of { Title, Subtitle, Icon }.",
|
||||
};
|
||||
|
||||
private async Task<object> HandleToolsCallAsync(JsonElement parameters, CancellationToken cancellationToken)
|
||||
{
|
||||
if (parameters.ValueKind != JsonValueKind.Object)
|
||||
throw new McpToolException("Invalid params: expected object");
|
||||
|
||||
if (!parameters.TryGetProperty("name", out var nameProp) || nameProp.ValueKind != JsonValueKind.String)
|
||||
throw new McpToolException("Missing 'name'");
|
||||
|
||||
var name = nameProp.GetString()!;
|
||||
if (string.IsNullOrWhiteSpace(name))
|
||||
throw new McpToolException("Empty tool name");
|
||||
|
||||
var args = parameters.TryGetProperty("arguments", out var argsProp) ? argsProp : default;
|
||||
if (args.ValueKind != JsonValueKind.Undefined
|
||||
&& args.ValueKind != JsonValueKind.Null
|
||||
&& args.ValueKind != JsonValueKind.Object)
|
||||
{
|
||||
throw new McpToolException("'arguments' must be a JSON object if present");
|
||||
}
|
||||
|
||||
var caps = _capabilityProvider();
|
||||
INodeCapability? capability = null;
|
||||
foreach (var c in caps)
|
||||
{
|
||||
if (!c.CanHandle(name)) continue;
|
||||
capability = c;
|
||||
break;
|
||||
}
|
||||
if (capability == null)
|
||||
throw new McpToolException($"Unknown tool: {name}");
|
||||
|
||||
var request = new NodeInvokeRequest
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(),
|
||||
Command = name,
|
||||
Args = args,
|
||||
};
|
||||
|
||||
_logger.Debug($"[MCP] tools/call {name}");
|
||||
// Pass the cancellation token through. Capabilities that override the
|
||||
// CT-aware overload (long-running screen/camera capture) will stop
|
||||
// their underlying pipeline on timeout; legacy capabilities fall back
|
||||
// to the no-CT signature and still benefit from WaitAsync freeing the
|
||||
// bridge's handler slot.
|
||||
NodeInvokeResponse response;
|
||||
try
|
||||
{
|
||||
response = await capability.ExecuteAsync(request, cancellationToken).WaitAsync(cancellationToken);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
_logger.Warn($"[MCP] tools/call {name} timed out");
|
||||
throw new McpToolException("request timed out");
|
||||
}
|
||||
|
||||
if (!response.Ok)
|
||||
throw new McpToolException(response.Error ?? "tool execution failed");
|
||||
|
||||
var payloadJson = response.Payload is null
|
||||
? "null"
|
||||
: JsonSerializer.Serialize(response.Payload, PayloadJsonOptions);
|
||||
|
||||
return new
|
||||
{
|
||||
content = new[]
|
||||
{
|
||||
new { type = "text", text = payloadJson },
|
||||
},
|
||||
isError = false,
|
||||
};
|
||||
}
|
||||
|
||||
private static string WriteResult(JsonElement? id, object result)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
using (var w = new Utf8JsonWriter(ms))
|
||||
{
|
||||
w.WriteStartObject();
|
||||
w.WriteString("jsonrpc", "2.0");
|
||||
WriteId(w, id);
|
||||
w.WritePropertyName("result");
|
||||
JsonSerializer.Serialize(w, result, PayloadJsonOptions);
|
||||
w.WriteEndObject();
|
||||
}
|
||||
return System.Text.Encoding.UTF8.GetString(ms.GetBuffer(), 0, (int)ms.Length);
|
||||
}
|
||||
|
||||
private static string WriteError(JsonElement? id, int code, string message)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
using (var w = new Utf8JsonWriter(ms))
|
||||
{
|
||||
w.WriteStartObject();
|
||||
w.WriteString("jsonrpc", "2.0");
|
||||
WriteId(w, id);
|
||||
w.WriteStartObject("error");
|
||||
w.WriteNumber("code", code);
|
||||
w.WriteString("message", message);
|
||||
w.WriteEndObject();
|
||||
w.WriteEndObject();
|
||||
}
|
||||
return System.Text.Encoding.UTF8.GetString(ms.GetBuffer(), 0, (int)ms.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tool execution failures are reported as a successful JSON-RPC result
|
||||
/// with isError=true (per MCP spec), not as a JSON-RPC error.
|
||||
/// </summary>
|
||||
private static string WriteToolError(JsonElement? id, string message)
|
||||
{
|
||||
var result = new
|
||||
{
|
||||
content = new[] { new { type = "text", text = message } },
|
||||
isError = true,
|
||||
};
|
||||
return WriteResult(id, result);
|
||||
}
|
||||
|
||||
private static void WriteId(Utf8JsonWriter w, JsonElement? id)
|
||||
{
|
||||
w.WritePropertyName("id");
|
||||
if (!id.HasValue || id.Value.ValueKind == JsonValueKind.Null)
|
||||
{
|
||||
w.WriteNullValue();
|
||||
return;
|
||||
}
|
||||
switch (id.Value.ValueKind)
|
||||
{
|
||||
case JsonValueKind.Number:
|
||||
// Preserve the original number form — fractional, big-int, etc.
|
||||
// GetInt64 would throw on non-integer or out-of-range ids and
|
||||
// strip the request id from the error response, breaking the
|
||||
// client's response correlation.
|
||||
w.WriteRawValue(id.Value.GetRawText(), skipInputValidation: true);
|
||||
break;
|
||||
case JsonValueKind.String:
|
||||
w.WriteStringValue(id.Value.GetString());
|
||||
break;
|
||||
default:
|
||||
w.WriteNullValue();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static class JsonRpcErrorCode
|
||||
{
|
||||
public const int ParseError = -32700;
|
||||
public const int InvalidRequest = -32600;
|
||||
public const int MethodNotFound = -32601;
|
||||
public const int InternalError = -32603;
|
||||
}
|
||||
|
||||
private sealed class McpMethodNotFoundException : Exception
|
||||
{
|
||||
public McpMethodNotFoundException(string method) : base($"Method not found: {method}") { }
|
||||
}
|
||||
|
||||
private sealed class McpToolException : Exception
|
||||
{
|
||||
public McpToolException(string message) : base(message) { }
|
||||
}
|
||||
}
|
||||
48
src/OpenClaw.Shared/MenuSizingHelper.cs
Normal file
48
src/OpenClaw.Shared/MenuSizingHelper.cs
Normal file
@ -0,0 +1,48 @@
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// Pure helper methods for constraining popup menu size to the visible work area.
|
||||
/// </summary>
|
||||
public static class MenuSizingHelper
|
||||
{
|
||||
private const double ScaleTolerance = 0.001;
|
||||
|
||||
public static int ConvertPixelsToViewUnits(int pixels, uint dpi)
|
||||
{
|
||||
if (pixels <= 0) return 0;
|
||||
if (dpi == 0) dpi = 96;
|
||||
|
||||
return Math.Max(1, (int)Math.Floor(pixels * 96.0 / dpi));
|
||||
}
|
||||
|
||||
public static bool HasDpiOrScaleChanged(uint previousDpi, double previousRasterizationScale, uint currentDpi, double currentRasterizationScale)
|
||||
{
|
||||
previousDpi = NormalizeDpi(previousDpi);
|
||||
currentDpi = NormalizeDpi(currentDpi);
|
||||
|
||||
if (previousDpi != currentDpi)
|
||||
return true;
|
||||
|
||||
var previousScale = NormalizeScale(previousRasterizationScale);
|
||||
var currentScale = NormalizeScale(currentRasterizationScale);
|
||||
return Math.Abs(previousScale - currentScale) > ScaleTolerance;
|
||||
}
|
||||
|
||||
public static int CalculateWindowHeight(int contentHeight, int workAreaHeight, int minimumHeight = 100)
|
||||
{
|
||||
if (contentHeight < 0) contentHeight = 0;
|
||||
if (minimumHeight < 1) minimumHeight = 1;
|
||||
|
||||
if (workAreaHeight <= 0)
|
||||
return Math.Max(contentHeight, minimumHeight);
|
||||
|
||||
var minimumVisibleHeight = Math.Min(minimumHeight, workAreaHeight);
|
||||
var desiredHeight = Math.Max(contentHeight, minimumVisibleHeight);
|
||||
return Math.Min(desiredHeight, workAreaHeight);
|
||||
}
|
||||
|
||||
private static uint NormalizeDpi(uint dpi) => dpi == 0 ? 96u : dpi;
|
||||
|
||||
private static double NormalizeScale(double scale) =>
|
||||
double.IsFinite(scale) && scale > 0 ? scale : 1.0;
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,7 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
@ -25,6 +26,16 @@ public class NodeInvokeRequest
|
||||
public JsonElement Args { get; set; }
|
||||
}
|
||||
|
||||
public class NodeInvokeCompletedEventArgs : EventArgs
|
||||
{
|
||||
public string RequestId { get; set; } = "";
|
||||
public string Command { get; set; } = "";
|
||||
public bool Ok { get; set; }
|
||||
public string? Error { get; set; }
|
||||
public TimeSpan Duration { get; set; }
|
||||
public string? NodeId { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Response to a node.invoke request
|
||||
/// </summary>
|
||||
@ -43,15 +54,25 @@ public interface INodeCapability
|
||||
{
|
||||
/// <summary>The capability category (canvas, camera, screen, system)</summary>
|
||||
string Category { get; }
|
||||
|
||||
|
||||
/// <summary>Commands this capability can handle</summary>
|
||||
IReadOnlyList<string> Commands { get; }
|
||||
|
||||
|
||||
/// <summary>Check if this capability can handle the given command</summary>
|
||||
bool CanHandle(string command);
|
||||
|
||||
|
||||
/// <summary>Execute a command and return the result</summary>
|
||||
Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request);
|
||||
|
||||
/// <summary>
|
||||
/// Execute a command with a cancellation token. The default implementation
|
||||
/// just calls <see cref="ExecuteAsync(NodeInvokeRequest)"/>; capabilities
|
||||
/// with long-running work (screen.record, camera.clip) should override so
|
||||
/// MCP request cancellation propagates into the underlying capture
|
||||
/// pipeline rather than orphaning it.
|
||||
/// </summary>
|
||||
Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request, CancellationToken cancellationToken)
|
||||
=> ExecuteAsync(request);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -61,20 +82,23 @@ public abstract class NodeCapabilityBase : INodeCapability
|
||||
{
|
||||
public abstract string Category { get; }
|
||||
public abstract IReadOnlyList<string> Commands { get; }
|
||||
|
||||
|
||||
protected IOpenClawLogger Logger { get; }
|
||||
|
||||
|
||||
protected NodeCapabilityBase(IOpenClawLogger logger)
|
||||
{
|
||||
Logger = logger;
|
||||
}
|
||||
|
||||
|
||||
public virtual bool CanHandle(string command)
|
||||
{
|
||||
return Commands.Contains(command);
|
||||
}
|
||||
|
||||
|
||||
public abstract Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request);
|
||||
|
||||
public virtual Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request, CancellationToken cancellationToken)
|
||||
=> ExecuteAsync(request);
|
||||
|
||||
protected NodeInvokeResponse Success(object? payload = null)
|
||||
{
|
||||
@ -121,7 +145,8 @@ public abstract class NodeCapabilityBase : INodeCapability
|
||||
return defaultValue;
|
||||
if (args.TryGetProperty(name, out var prop) && prop.ValueKind == JsonValueKind.Number)
|
||||
{
|
||||
return prop.GetInt32();
|
||||
try { return prop.GetInt32(); }
|
||||
catch (FormatException) { return defaultValue; }
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
@ -137,6 +162,32 @@ public abstract class NodeCapabilityBase : INodeCapability
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Get a string array from a JSON array property. Non-string and whitespace-only elements
|
||||
/// are ignored. Strings are trimmed to preserve the historical system.which behavior.
|
||||
/// </summary>
|
||||
protected string[] GetStringArrayArg(JsonElement args, string name)
|
||||
{
|
||||
if (args.ValueKind == JsonValueKind.Undefined || args.ValueKind == JsonValueKind.Null)
|
||||
return Array.Empty<string>();
|
||||
if (!args.TryGetProperty(name, out var prop) || prop.ValueKind != JsonValueKind.Array)
|
||||
return Array.Empty<string>();
|
||||
|
||||
var buffer = new string[prop.GetArrayLength()];
|
||||
var count = 0;
|
||||
foreach (var item in prop.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind != JsonValueKind.String)
|
||||
continue;
|
||||
|
||||
var value = item.GetString()?.Trim();
|
||||
if (!string.IsNullOrEmpty(value))
|
||||
buffer[count++] = value;
|
||||
}
|
||||
|
||||
return count > 0 ? buffer[..count] : [];
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Frozen;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
@ -11,56 +12,67 @@ namespace OpenClaw.Shared;
|
||||
/// </summary>
|
||||
public class NotificationCategorizer
|
||||
{
|
||||
private static readonly Dictionary<string, (string title, string type)> ChannelMap = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["calendar"] = ("📅 Calendar", "calendar"),
|
||||
["email"] = ("📧 Email", "email"),
|
||||
["ci"] = ("🔨 Build", "build"),
|
||||
["build"] = ("🔨 Build", "build"),
|
||||
["inventory"] = ("📦 Stock Alert", "stock"),
|
||||
["stock"] = ("📦 Stock Alert", "stock"),
|
||||
["health"] = ("🩸 Blood Sugar Alert", "health"),
|
||||
["alerts"] = ("🚨 Urgent Alert", "urgent"),
|
||||
};
|
||||
// FrozenDictionary gives O(1) case-insensitive lookup with no per-call allocation;
|
||||
// these maps are never mutated after startup so FrozenDictionary is the correct choice.
|
||||
private static readonly FrozenDictionary<string, (string title, string type)> ChannelMap =
|
||||
new Dictionary<string, (string title, string type)>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["calendar"] = ("📅 Calendar", "calendar"),
|
||||
["email"] = ("📧 Email", "email"),
|
||||
["ci"] = ("🔨 Build", "build"),
|
||||
["build"] = ("🔨 Build", "build"),
|
||||
["inventory"] = ("📦 Stock Alert", "stock"),
|
||||
["stock"] = ("📦 Stock Alert", "stock"),
|
||||
["health"] = ("🩸 Blood Sugar Alert", "health"),
|
||||
["alerts"] = ("🚨 Urgent Alert", "urgent"),
|
||||
}.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private static readonly Dictionary<string, (string title, string type)> IntentMap = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["health"] = ("🩸 Blood Sugar Alert", "health"),
|
||||
["urgent"] = ("🚨 Urgent Alert", "urgent"),
|
||||
["alert"] = ("🚨 Urgent Alert", "urgent"),
|
||||
["reminder"] = ("⏰ Reminder", "reminder"),
|
||||
["email"] = ("📧 Email", "email"),
|
||||
["calendar"] = ("📅 Calendar", "calendar"),
|
||||
["build"] = ("🔨 Build", "build"),
|
||||
["stock"] = ("📦 Stock Alert", "stock"),
|
||||
["error"] = ("⚠️ Error", "error"),
|
||||
};
|
||||
private static readonly FrozenDictionary<string, (string title, string type)> IntentMap =
|
||||
new Dictionary<string, (string title, string type)>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["health"] = ("🩸 Blood Sugar Alert", "health"),
|
||||
["urgent"] = ("🚨 Urgent Alert", "urgent"),
|
||||
["alert"] = ("🚨 Urgent Alert", "urgent"),
|
||||
["reminder"] = ("⏰ Reminder", "reminder"),
|
||||
["email"] = ("📧 Email", "email"),
|
||||
["calendar"] = ("📅 Calendar", "calendar"),
|
||||
["build"] = ("🔨 Build", "build"),
|
||||
["stock"] = ("📦 Stock Alert", "stock"),
|
||||
["error"] = ("⚠️ Error", "error"),
|
||||
}.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private static readonly Dictionary<string, string> CategoryTitles = new(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["health"] = "🩸 Blood Sugar Alert",
|
||||
["urgent"] = "🚨 Urgent Alert",
|
||||
["reminder"] = "⏰ Reminder",
|
||||
["stock"] = "📦 Stock Alert",
|
||||
["email"] = "📧 Email",
|
||||
["calendar"] = "📅 Calendar",
|
||||
["error"] = "⚠️ Error",
|
||||
["build"] = "🔨 Build",
|
||||
["info"] = "🤖 OpenClaw",
|
||||
};
|
||||
private static readonly FrozenDictionary<string, (string title, string type)> CategoryTypeMap =
|
||||
new Dictionary<string, (string title, string type)>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
["health"] = ("🩸 Blood Sugar Alert", "health"),
|
||||
["urgent"] = ("🚨 Urgent Alert", "urgent"),
|
||||
["reminder"] = ("⏰ Reminder", "reminder"),
|
||||
["stock"] = ("📦 Stock Alert", "stock"),
|
||||
["email"] = ("📧 Email", "email"),
|
||||
["calendar"] = ("📅 Calendar", "calendar"),
|
||||
["error"] = ("⚠️ Error", "error"),
|
||||
["build"] = ("🔨 Build", "build"),
|
||||
["info"] = ("🤖 OpenClaw", "info"),
|
||||
}.ToFrozenDictionary(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>
|
||||
/// Classify a notification using the layered pipeline.
|
||||
/// When <paramref name="preferStructuredCategories"/> is true (default),
|
||||
/// structured metadata (Intent, Channel) is checked first.
|
||||
/// When false, classification starts from user-defined rules then keyword fallback.
|
||||
/// </summary>
|
||||
public (string title, string type) Classify(OpenClawNotification notification, IReadOnlyList<UserNotificationRule>? userRules = null)
|
||||
public (string title, string type) Classify(OpenClawNotification notification, IReadOnlyList<UserNotificationRule>? userRules = null, bool preferStructuredCategories = true)
|
||||
{
|
||||
// 1. Structured metadata: Intent
|
||||
if (!string.IsNullOrEmpty(notification.Intent) && IntentMap.TryGetValue(notification.Intent, out var intentResult))
|
||||
return intentResult;
|
||||
if (preferStructuredCategories)
|
||||
{
|
||||
// 1. Structured metadata: Intent
|
||||
if (!string.IsNullOrEmpty(notification.Intent) && IntentMap.TryGetValue(notification.Intent, out var intentResult))
|
||||
return intentResult;
|
||||
|
||||
// 2. Structured metadata: Channel
|
||||
if (!string.IsNullOrEmpty(notification.Channel) && ChannelMap.TryGetValue(notification.Channel, out var channelResult))
|
||||
return channelResult;
|
||||
// 2. Structured metadata: Channel
|
||||
if (!string.IsNullOrEmpty(notification.Channel) && ChannelMap.TryGetValue(notification.Channel, out var channelResult))
|
||||
return channelResult;
|
||||
}
|
||||
|
||||
// 3. User-defined rules (pattern match on title + message)
|
||||
if (userRules is { Count: > 0 })
|
||||
@ -71,9 +83,10 @@ public class NotificationCategorizer
|
||||
if (!rule.Enabled) continue;
|
||||
if (MatchesRule(searchText, rule))
|
||||
{
|
||||
if (CategoryTypeMap.TryGetValue(rule.Category, out var categoryResult))
|
||||
return categoryResult;
|
||||
var cat = rule.Category.ToLowerInvariant();
|
||||
var title = CategoryTitles.GetValueOrDefault(cat, "🤖 OpenClaw");
|
||||
return (title, cat);
|
||||
return ("🤖 OpenClaw", cat);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -87,29 +100,38 @@ public class NotificationCategorizer
|
||||
/// </summary>
|
||||
public static (string title, string type) ClassifyByKeywords(string text)
|
||||
{
|
||||
var lower = text.ToLowerInvariant();
|
||||
if (lower.Contains("blood sugar") || lower.Contains("glucose") ||
|
||||
lower.Contains("cgm") || lower.Contains("mg/dl"))
|
||||
// Use OrdinalIgnoreCase overloads to avoid allocating a lowercased copy of `text`.
|
||||
if (text.Contains("blood sugar", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("glucose", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("cgm", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("mg/dl", StringComparison.OrdinalIgnoreCase))
|
||||
return ("🩸 Blood Sugar Alert", "health");
|
||||
if (lower.Contains("urgent") || lower.Contains("critical") ||
|
||||
lower.Contains("emergency"))
|
||||
if (text.Contains("urgent", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("critical", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("emergency", StringComparison.OrdinalIgnoreCase))
|
||||
return ("🚨 Urgent Alert", "urgent");
|
||||
if (lower.Contains("reminder"))
|
||||
if (text.Contains("reminder", StringComparison.OrdinalIgnoreCase))
|
||||
return ("⏰ Reminder", "reminder");
|
||||
if (lower.Contains("stock") || lower.Contains("in stock") ||
|
||||
lower.Contains("available now"))
|
||||
if (text.Contains("stock", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("in stock", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("available now", StringComparison.OrdinalIgnoreCase))
|
||||
return ("📦 Stock Alert", "stock");
|
||||
if (lower.Contains("email") || lower.Contains("inbox") ||
|
||||
lower.Contains("gmail"))
|
||||
if (text.Contains("email", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("inbox", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("gmail", StringComparison.OrdinalIgnoreCase))
|
||||
return ("📧 Email", "email");
|
||||
if (lower.Contains("calendar") || lower.Contains("meeting") ||
|
||||
lower.Contains("event"))
|
||||
if (text.Contains("calendar", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("meeting", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("event", StringComparison.OrdinalIgnoreCase))
|
||||
return ("📅 Calendar", "calendar");
|
||||
if (lower.Contains("error") || lower.Contains("failed") ||
|
||||
lower.Contains("exception"))
|
||||
if (text.Contains("error", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("failed", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("exception", StringComparison.OrdinalIgnoreCase))
|
||||
return ("⚠️ Error", "error");
|
||||
if (lower.Contains("build") || lower.Contains("ci ") ||
|
||||
lower.Contains("deploy"))
|
||||
if (text.Contains("build", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("ci ", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("ci/", StringComparison.OrdinalIgnoreCase) ||
|
||||
text.Contains("deploy", StringComparison.OrdinalIgnoreCase))
|
||||
return ("🔨 Build", "build");
|
||||
return ("🤖 OpenClaw", "info");
|
||||
}
|
||||
|
||||
@ -15,6 +15,13 @@
|
||||
<PackageReference Include="NSec.Cryptography" Version="25.4.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<!-- Audio / Speech-to-Text (platform-agnostic components) -->
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Whisper.net" Version="1.9.0" />
|
||||
<PackageReference Include="Whisper.net.Runtime" Version="1.9.0" />
|
||||
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.25.1" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,5 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
@ -9,8 +10,20 @@ public class SettingsData
|
||||
{
|
||||
public string? GatewayUrl { get; set; }
|
||||
public string? Token { get; set; }
|
||||
public bool AutoStart { get; set; }
|
||||
public string? BootstrapToken { get; set; }
|
||||
public bool UseSshTunnel { get; set; } = false;
|
||||
public string? SshTunnelUser { get; set; }
|
||||
public string? SshTunnelHost { get; set; }
|
||||
public int SshTunnelRemotePort { get; set; } = 18789;
|
||||
public int SshTunnelLocalPort { get; set; } = 18789;
|
||||
public bool AutoStart { get; set; } = true;
|
||||
public bool GlobalHotkeyEnabled { get; set; } = true;
|
||||
/// <summary>
|
||||
/// One-shot gate: set to true after the post-onboarding "first-run" bootstrap
|
||||
/// kickoff message has been injected into the chat exactly once. Subsequent
|
||||
/// chat-window launches skip injection.
|
||||
/// </summary>
|
||||
public bool HasInjectedFirstRunBootstrap { get; set; }
|
||||
public bool ShowNotifications { get; set; } = true;
|
||||
public string? NotificationSound { get; set; }
|
||||
public bool NotifyHealth { get; set; } = true;
|
||||
@ -22,22 +35,80 @@ public class SettingsData
|
||||
public bool NotifyStock { get; set; } = true;
|
||||
public bool NotifyInfo { get; set; } = true;
|
||||
public bool EnableNodeMode { get; set; } = false;
|
||||
public bool NodeCanvasEnabled { get; set; } = true;
|
||||
public bool NodeScreenEnabled { get; set; } = true;
|
||||
public bool NodeCameraEnabled { get; set; } = true;
|
||||
public bool ScreenRecordingConsentGiven { get; set; } = false;
|
||||
public bool CameraRecordingConsentGiven { get; set; } = false;
|
||||
public bool NodeLocationEnabled { get; set; } = true;
|
||||
public bool NodeBrowserProxyEnabled { get; set; } = true;
|
||||
public bool NodeSttEnabled { get; set; } = false;
|
||||
/// <summary>STT language: "auto" for Whisper auto-detect, or a BCP-47 tag like "en-US".</summary>
|
||||
public string SttLanguage { get; set; } = "auto";
|
||||
/// <summary>Whisper model name: "tiny", "base", or "small".</summary>
|
||||
public string SttModelName { get; set; } = "base";
|
||||
/// <summary>Seconds of silence before auto-submit in voice chat mode.</summary>
|
||||
public float SttSilenceTimeout { get; set; } = 2.5f;
|
||||
/// <summary>Enable TTS playback of responses during voice sessions.</summary>
|
||||
public bool VoiceTtsEnabled { get; set; } = true;
|
||||
/// <summary>Play audio feedback chimes on listen start/stop.</summary>
|
||||
public bool VoiceAudioFeedback { get; set; } = true;
|
||||
public bool NodeTtsEnabled { get; set; } = false;
|
||||
public string TtsProvider { get; set; } = OpenClaw.Shared.Capabilities.TtsCapability.PiperProvider;
|
||||
/// <summary>Persisted: whether the Hub's NavigationView pane is expanded
|
||||
/// (true) or collapsed/compact (false). Default true.</summary>
|
||||
public bool HubNavPaneOpen { get; set; } = true;
|
||||
/// <summary>Optional Windows TTS voice id (or display name). Empty = system default.</summary>
|
||||
public string? TtsWindowsVoiceId { get; set; }
|
||||
/// <summary>
|
||||
/// ElevenLabs API key storage slot. When persisted by the Windows tray's
|
||||
/// SettingsManager this is an opaque dpapi:-prefixed blob, not plaintext.
|
||||
/// </summary>
|
||||
public string? TtsElevenLabsApiKey { get; set; }
|
||||
public string? TtsElevenLabsModel { get; set; }
|
||||
public string? TtsElevenLabsVoiceId { get; set; }
|
||||
/// <summary>Piper voice identifier, e.g. "en_US-amy-low". Voice file is downloaded on first use.</summary>
|
||||
public string TtsPiperVoiceId { get; set; } = "en_US-amy-low";
|
||||
/// <summary>Run the local MCP HTTP server. Independent of EnableNodeMode.</summary>
|
||||
public bool EnableMcpServer { get; set; } = false;
|
||||
/// <summary>
|
||||
/// Hostnames the A2UI image renderer is allowed to fetch over HTTPS.
|
||||
/// Empty by default — agents can still ship inline data: images. Add hosts
|
||||
/// (e.g., "cdn.example.com") via the Settings window.
|
||||
/// </summary>
|
||||
public List<string>? A2UIImageHosts { get; set; }
|
||||
/// <summary>
|
||||
/// Legacy flag (replaced by EnableMcpServer + the EnableNodeMode pair).
|
||||
/// Kept for one-time migration on Load; not written on Save.
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public bool? McpOnlyMode { get; set; }
|
||||
public string? PreferredGatewayId { get; set; }
|
||||
public bool HasSeenActivityStreamTip { get; set; } = false;
|
||||
public string? SkippedUpdateTag { get; set; }
|
||||
public bool NotifyChatResponses { get; set; } = true;
|
||||
public bool PreferStructuredCategories { get; set; } = true;
|
||||
public List<UserNotificationRule>? UserRules { get; set; }
|
||||
|
||||
private static readonly JsonSerializerOptions s_options = new() { WriteIndented = true };
|
||||
// ── (Voice / STT settings consolidated into the block above.) ──
|
||||
|
||||
private static readonly JsonSerializerOptions s_options = new()
|
||||
{
|
||||
WriteIndented = true,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
public string ToJson() => JsonSerializer.Serialize(this, s_options);
|
||||
|
||||
public static SettingsData? FromJson(string json)
|
||||
public static SettingsData? FromJson(string? json)
|
||||
{
|
||||
if (string.IsNullOrEmpty(json))
|
||||
return null;
|
||||
try
|
||||
{
|
||||
return JsonSerializer.Deserialize<SettingsData>(json);
|
||||
}
|
||||
catch
|
||||
catch (JsonException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
using System.Buffers;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
@ -7,18 +9,19 @@ namespace OpenClaw.Shared;
|
||||
/// </summary>
|
||||
internal static class ShellQuoting
|
||||
{
|
||||
// SearchValues<char> builds an optimized SIMD lookup structure once at startup,
|
||||
// allowing IndexOfAny to scan multiple characters simultaneously (SSE2/AVX2 on x64).
|
||||
// Equivalent set to the former IsShellMetachar switch — 25 shell metacharacters.
|
||||
private static readonly SearchValues<char> s_shellMetachars =
|
||||
SearchValues.Create(" \t\"'&|;<>()^%!$`*?[]{}~\n\r");
|
||||
|
||||
/// <summary>
|
||||
/// Returns true when the argument contains characters that require quoting
|
||||
/// to prevent shell splitting or metacharacter interpretation.
|
||||
/// </summary>
|
||||
internal static bool NeedsQuoting(string arg)
|
||||
{
|
||||
foreach (var c in arg)
|
||||
{
|
||||
if (IsShellMetachar(c))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return arg.AsSpan().IndexOfAny(s_shellMetachars) >= 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -56,7 +59,11 @@ internal static class ShellQuoting
|
||||
/// </summary>
|
||||
internal static string FormatExecCommand(string[] argv)
|
||||
{
|
||||
return string.Join(" ", argv.Select(FormatSingleArg));
|
||||
if (argv.Length == 0) return string.Empty;
|
||||
var parts = new string[argv.Length];
|
||||
for (var i = 0; i < argv.Length; i++)
|
||||
parts[i] = FormatSingleArg(argv[i]);
|
||||
return string.Join(" ", parts);
|
||||
|
||||
static string FormatSingleArg(string arg)
|
||||
{
|
||||
@ -66,14 +73,4 @@ internal static class ShellQuoting
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsShellMetachar(char c) => c switch
|
||||
{
|
||||
' ' or '\t' or '"' or '\'' or
|
||||
'&' or '|' or ';' or '<' or '>' or
|
||||
'(' or ')' or '^' or '%' or '!' or
|
||||
'$' or '`' or '*' or '?' or '[' or
|
||||
']' or '{' or '}' or '~' or
|
||||
'\n' or '\r' => true,
|
||||
_ => false
|
||||
};
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user