Compare commits
1 Commits
master
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
68800614a3 |
102
.github/workflows/ci.yml
vendored
102
.github/workflows/ci.yml
vendored
@ -11,17 +11,17 @@ jobs:
|
||||
test:
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup .NET 10
|
||||
uses: actions/setup-dotnet@v5
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
|
||||
- name: Cache NuGet packages
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }}
|
||||
@ -39,13 +39,6 @@ jobs:
|
||||
- name: Restore dependencies
|
||||
run: dotnet restore
|
||||
|
||||
# dotnet-coverage replaces coverlet because the integration tests spawn the
|
||||
# tray exe out-of-process; coverlet only instruments the in-proc test
|
||||
# assembly. Installing once at the job level lets every test step wrap its
|
||||
# `dotnet test` invocation in `dotnet-coverage collect`.
|
||||
- name: Install dotnet-coverage
|
||||
run: dotnet tool install --global dotnet-coverage
|
||||
|
||||
- name: Build Shared Library
|
||||
run: dotnet build src/OpenClaw.Shared -c Debug --no-restore
|
||||
|
||||
@ -55,82 +48,29 @@ jobs:
|
||||
- name: Build Tests
|
||||
run: |
|
||||
dotnet build tests/OpenClaw.Shared.Tests -c Debug --no-restore
|
||||
dotnet build tests/OpenClaw.Tray.Tests -c Debug -r win-x64 --no-restore
|
||||
dotnet build tests/OpenClaw.Tray.IntegrationTests -c Debug -r win-x64 --no-restore
|
||||
dotnet build tests/OpenClaw.Tray.UITests -c Debug -r win-x64 --no-restore
|
||||
dotnet build tests/OpenClaw.Tray.Tests -c Debug --no-restore
|
||||
|
||||
- name: Run Shared Tests
|
||||
env:
|
||||
OPENCLAW_RUN_INTEGRATION: 1
|
||||
run: >
|
||||
dotnet-coverage collect
|
||||
--output TestResults\Shared\coverage.cobertura.xml
|
||||
--output-format cobertura
|
||||
"dotnet test tests/OpenClaw.Shared.Tests
|
||||
dotnet test tests/OpenClaw.Shared.Tests
|
||||
--no-build
|
||||
-c Debug
|
||||
--verbosity normal
|
||||
--collect:"XPlat Code Coverage"
|
||||
--results-directory TestResults\Shared
|
||||
--logger trx;LogFileName=OpenClaw.Shared.Tests.trx"
|
||||
--logger "trx;LogFileName=OpenClaw.Shared.Tests.trx"
|
||||
|
||||
- name: Run Tray Tests
|
||||
run: >
|
||||
dotnet-coverage collect
|
||||
--output TestResults\Tray\coverage.cobertura.xml
|
||||
--output-format cobertura
|
||||
"dotnet test tests/OpenClaw.Tray.Tests
|
||||
dotnet test tests/OpenClaw.Tray.Tests
|
||||
--no-build
|
||||
-c Debug
|
||||
-r win-x64
|
||||
--verbosity normal
|
||||
--collect:"XPlat Code Coverage"
|
||||
--results-directory TestResults\Tray
|
||||
--logger trx;LogFileName=OpenClaw.Tray.Tests.trx"
|
||||
|
||||
# Tray integration tests gate on OPENCLAW_RUN_INTEGRATION; set it so the
|
||||
# MCP-server / capability tests actually run. dotnet-coverage with no
|
||||
# filter captures coverage for both the test host AND the spawned tray
|
||||
# exe (coverlet could not — see tests/Directory.Build.props comment).
|
||||
- name: Run Tray Integration Tests
|
||||
env:
|
||||
OPENCLAW_RUN_INTEGRATION: 1
|
||||
run: >
|
||||
dotnet-coverage collect
|
||||
--output TestResults\TrayIntegration\coverage.cobertura.xml
|
||||
--output-format cobertura
|
||||
"dotnet test tests/OpenClaw.Tray.IntegrationTests
|
||||
--no-build
|
||||
-c Debug
|
||||
-r win-x64
|
||||
--verbosity normal
|
||||
--results-directory TestResults\TrayIntegration
|
||||
--logger trx;LogFileName=OpenClaw.Tray.IntegrationTests.trx"
|
||||
|
||||
# UI tests need a real visual tree AND a system-registered WindowsAppRuntime
|
||||
# framework MSIX — the test fixture calls Bootstrap.Initialize(1.8, stable),
|
||||
# which looks up the framework package by identity. The hosted windows-2025
|
||||
# runner image doesn't preinstall it, so we install it explicitly here.
|
||||
# Version pinned to match Microsoft.WindowsAppSDK 1.8.260101001 in the csprojs.
|
||||
- name: Install WindowsAppRuntime 1.8
|
||||
shell: pwsh
|
||||
run: |
|
||||
$url = "https://aka.ms/windowsappsdk/1.8/1.8.260101001/windowsappruntimeinstall-x64.exe"
|
||||
$exe = "$env:RUNNER_TEMP\WindowsAppRuntimeInstall.exe"
|
||||
Invoke-WebRequest -Uri $url -OutFile $exe
|
||||
& $exe --quiet
|
||||
if ($LASTEXITCODE -ne 0) { throw "WindowsAppRuntimeInstall failed with exit code $LASTEXITCODE" }
|
||||
|
||||
- name: Run Tray UI Tests
|
||||
run: >
|
||||
dotnet-coverage collect
|
||||
--output TestResults\TrayUI\coverage.cobertura.xml
|
||||
--output-format cobertura
|
||||
"dotnet test tests/OpenClaw.Tray.UITests
|
||||
--no-build
|
||||
-c Debug
|
||||
-r win-x64
|
||||
--verbosity normal
|
||||
--results-directory TestResults\TrayUI
|
||||
--logger trx;LogFileName=OpenClaw.Tray.UITests.trx"
|
||||
--logger "trx;LogFileName=OpenClaw.Tray.Tests.trx"
|
||||
|
||||
- name: Upload Test Results
|
||||
if: always()
|
||||
@ -152,15 +92,15 @@ jobs:
|
||||
rid: [win-x64, win-arm64]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup .NET 10
|
||||
uses: actions/setup-dotnet@v5
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
|
||||
- name: Cache NuGet packages
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }}
|
||||
@ -218,10 +158,10 @@ jobs:
|
||||
platform: ARM64
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup .NET 10 for VS MSBuild
|
||||
uses: actions/setup-dotnet@v5
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.100
|
||||
|
||||
@ -234,7 +174,7 @@ jobs:
|
||||
dotnet --version
|
||||
|
||||
- name: Cache NuGet packages
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }}
|
||||
@ -320,15 +260,15 @@ jobs:
|
||||
platform: [x64, arm64]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup .NET 10
|
||||
uses: actions/setup-dotnet@v5
|
||||
uses: actions/setup-dotnet@v4
|
||||
with:
|
||||
dotnet-version: 10.0.x
|
||||
|
||||
- name: Cache NuGet packages
|
||||
uses: actions/cache@v5
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.nuget/packages
|
||||
key: nuget-${{ runner.os }}-${{ hashFiles('**/*.csproj') }}
|
||||
@ -354,7 +294,7 @@ jobs:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download win-x64 tray artifact
|
||||
uses: actions/download-artifact@v8
|
||||
@ -500,7 +440,7 @@ jobs:
|
||||
timestamp-digest: SHA256
|
||||
|
||||
- name: Create Release
|
||||
uses: softprops/action-gh-release@v3
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
generate_release_notes: true
|
||||
files: |
|
||||
|
||||
2
.github/workflows/copilot-setup-steps.yml
vendored
2
.github/workflows/copilot-setup-steps.yml
vendored
@ -21,6 +21,6 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v6
|
||||
- name: Install gh-aw extension
|
||||
uses: github/gh-aw-actions/setup-cli@07c7335cd76c4d4d9f00dd7874f85ff55ed71f24 # v0.71.3
|
||||
uses: github/gh-aw-actions/setup-cli@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
with:
|
||||
version: v0.68.1
|
||||
|
||||
72
.github/workflows/repo-assist.lock.yml
generated
vendored
72
.github/workflows/repo-assist.lock.yml
generated
vendored
@ -47,9 +47,9 @@
|
||||
# Custom actions used:
|
||||
# - actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
# - actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
# - actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
# - actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
# - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
# - github/gh-aw-actions/setup@ba90f2186d7ad780ec640f364005fa24e797b360 # v0.68.3
|
||||
# - github/gh-aw-actions/setup@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
#
|
||||
# Container images used:
|
||||
# - ghcr.io/github/gh-aw-firewall/agent:0.25.20@sha256:9161f2415a3306a344aca34dd671ee69f122317e0a512e66dc64c94b9c508682
|
||||
@ -131,7 +131,7 @@ jobs:
|
||||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@ba90f2186d7ad780ec640f364005fa24e797b360 # v0.68.3
|
||||
uses: github/gh-aw-actions/setup@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
@ -155,7 +155,7 @@ jobs:
|
||||
GH_AW_INFO_AWMG_VERSION: ""
|
||||
GH_AW_INFO_FIREWALL_TYPE: "squid"
|
||||
GH_AW_COMPILED_STRICT: "true"
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
@ -165,7 +165,7 @@ jobs:
|
||||
- name: Add eyes reaction for immediate feedback
|
||||
id: react
|
||||
if: github.event_name == 'issues' || github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment' || github.event_name == 'discussion' || github.event_name == 'discussion_comment' || github.event_name == 'pull_request' && github.event.pull_request.head.repo.id == github.repository_id
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_REACTION: "eyes"
|
||||
with:
|
||||
@ -191,7 +191,7 @@ jobs:
|
||||
fetch-depth: 1
|
||||
- name: Check workflow lock file
|
||||
id: check-lock-file
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_WORKFLOW_FILE: "repo-assist.lock.yml"
|
||||
GH_AW_CONTEXT_WORKFLOW_REF: "${{ github.workflow_ref }}"
|
||||
@ -202,7 +202,7 @@ jobs:
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs');
|
||||
await main();
|
||||
- name: Check compile-agentic version
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_COMPILED_VERSION: "v0.68.3"
|
||||
with:
|
||||
@ -213,7 +213,7 @@ jobs:
|
||||
await main();
|
||||
- name: Compute current body text
|
||||
id: sanitized
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
@ -223,7 +223,7 @@ jobs:
|
||||
- name: Add comment with workflow run link
|
||||
id: add-comment
|
||||
if: github.event_name == 'issues' || github.event_name == 'issue_comment' || github.event_name == 'pull_request_review_comment' || github.event_name == 'discussion' || github.event_name == 'discussion_comment' || github.event_name == 'pull_request' && github.event.pull_request.head.repo.id == github.repository_id
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_WORKFLOW_NAME: "Repo Assist"
|
||||
GH_AW_SAFE_OUTPUT_MESSAGES: "{\"footer\":\"\\u003e Generated by 🌈 {workflow_name}, see [workflow run]({run_url}). [Learn more](https://github.com/githubnext/agentics/blob/main/docs/repo-assist.md).\",\"runStarted\":\"{workflow_name} is processing {event_type}, see [workflow run]({run_url})...\",\"runSuccess\":\"✓ {workflow_name} completed successfully, see [workflow run]({run_url}).\",\"runFailure\":\"✗ {workflow_name} encountered {status}, see [workflow run]({run_url}).\"}"
|
||||
@ -314,7 +314,7 @@ jobs:
|
||||
GH_AW_PROMPT_0b7a82d8a513bd25_EOF
|
||||
} > "$GH_AW_PROMPT"
|
||||
- name: Interpolate variables and render templates
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
|
||||
GH_AW_GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
@ -328,7 +328,7 @@ jobs:
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs');
|
||||
await main();
|
||||
- name: Substitute placeholders
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
|
||||
GH_AW_GITHUB_ACTOR: ${{ github.actor }}
|
||||
@ -430,7 +430,7 @@ jobs:
|
||||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@ba90f2186d7ad780ec640f364005fa24e797b360 # v0.68.3
|
||||
uses: github/gh-aw-actions/setup@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
@ -503,7 +503,7 @@ jobs:
|
||||
id: checkout-pr
|
||||
if: |
|
||||
github.event.pull_request || github.event.issue.pull_request
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
@ -835,7 +835,7 @@ jobs:
|
||||
"customValidation": "requiresOneOf:status,title,body"
|
||||
}
|
||||
}
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
@ -1027,7 +1027,7 @@ jobs:
|
||||
bash "${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh" "$GATEWAY_PID"
|
||||
- name: Redact secrets in logs
|
||||
if: always()
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
@ -1053,7 +1053,7 @@ jobs:
|
||||
- name: Ingest agent output
|
||||
id: collect_output
|
||||
if: always()
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }}
|
||||
GH_AW_ALLOWED_DOMAINS: "*.gradle-enterprise.cloud,*.pythonhosted.org,*.vsblob.vsassets.io,adoptium.net,anaconda.org,api.adoptium.net,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.foojay.io,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.npms.io,api.nuget.org,api.snapcraft.io,archive.apache.org,archive.ubuntu.com,azure.archive.ubuntu.com,azuresearch-usnc.nuget.org,azuresearch-ussc.nuget.org,binstar.org,bootstrap.pypa.io,builds.dotnet.microsoft.com,bun.sh,cdn.azul.com,cdn.jsdelivr.net,central.sonatype.com,ci.dot.net,conda.anaconda.org,conda.binstar.org,crates.io,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,dc.services.visualstudio.com,deb.nodesource.com,deno.land,develocity.apache.org,dist.nuget.org,dl.google.com,dlcdn.apache.org,dot.net,dotnet.microsoft.com,dotnetcli.blob.core.windows.net,download.eclipse.org,download.java.net,download.oracle.com,downloads.gradle-dn.com,esm.sh,files.pythonhosted.org,ge.spockframework.org,get.pnpm.io,github.com,googleapis.deno.dev,googlechromelabs.github.io,gradle.org,host.docker.internal,index.crates.io,jcenter.bintray.com,jdk.java.net,json-schema.org,json.schemastore.org,jsr.io,keyserver.ubuntu.com,maven-central.storage-download.googleapis.com,maven.apache.org,maven.google.com,maven.oracle.com,maven.pkg.github.com,nodejs.org,npm.pkg.github.com,npmjs.com,npmjs.org,nuget.org,nuget.pkg.github.com,nugetregistryv2prod.blob.core.windows.net,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,oneocsp.microsoft.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,pip.pypa.io,pkgs.dev.azure.com,plugins-artifacts.gradle.org,plugins.gradle.org,ppa.launchpad.net,pypi.org,pypi.python.org,raw.githubusercontent.com,registry.bower.io,registry.npmjs.com,registry.npmjs.org,registry.yarnpkg.com,repo.anaconda.com,repo.continuum.io,repo.gradle.org,repo.grails.org,repo.maven.apache.org,repo.spring.io,repo.yarnpkg.com,repo1.maven.org,repository.apache.org,s.symcb.com,s.symcd.com,scans-in.gradle.com,security.ubuntu.com,services.gradle.org,sh.rustup.rs,skimdb.npmjs.com,static.crates.io,static.rust-lang.org,storage.googleapis.com,telemetry.enterprise.githubcopilot.com,telemetry.vercel.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com,www.java.com,www.microsoft.com,www.npmjs.com,www.npmjs.org,yarnpkg.com"
|
||||
@ -1068,7 +1068,7 @@ jobs:
|
||||
await main();
|
||||
- name: Parse agent logs for step summary
|
||||
if: always()
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/
|
||||
with:
|
||||
@ -1080,7 +1080,7 @@ jobs:
|
||||
- name: Parse MCP Gateway logs for step summary
|
||||
if: always()
|
||||
id: parse-mcp-gateway
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
@ -1105,7 +1105,7 @@ jobs:
|
||||
- name: Parse token usage for step summary
|
||||
if: always()
|
||||
continue-on-error: true
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
@ -1179,7 +1179,7 @@ jobs:
|
||||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@ba90f2186d7ad780ec640f364005fa24e797b360 # v0.68.3
|
||||
uses: github/gh-aw-actions/setup@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
@ -1200,7 +1200,7 @@ jobs:
|
||||
echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT"
|
||||
- name: Process no-op messages
|
||||
id: noop
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_NOOP_MAX: "1"
|
||||
@ -1219,7 +1219,7 @@ jobs:
|
||||
await main();
|
||||
- name: Log detection run
|
||||
id: detection_runs
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_WORKFLOW_NAME: "Repo Assist"
|
||||
@ -1237,7 +1237,7 @@ jobs:
|
||||
await main();
|
||||
- name: Record missing tool
|
||||
id: missing_tool
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_MISSING_TOOL_CREATE_ISSUE: "true"
|
||||
@ -1253,7 +1253,7 @@ jobs:
|
||||
await main();
|
||||
- name: Record incomplete
|
||||
id: report_incomplete
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_REPORT_INCOMPLETE_CREATE_ISSUE: "true"
|
||||
@ -1270,7 +1270,7 @@ jobs:
|
||||
- name: Handle agent failure
|
||||
id: handle_agent_failure
|
||||
if: always()
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_WORKFLOW_NAME: "Repo Assist"
|
||||
@ -1307,7 +1307,7 @@ jobs:
|
||||
await main();
|
||||
- name: Update reaction comment with completion status
|
||||
id: conclusion
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_COMMENT_ID: ${{ needs.activation.outputs.comment_id }}
|
||||
@ -1342,7 +1342,7 @@ jobs:
|
||||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@ba90f2186d7ad780ec640f364005fa24e797b360 # v0.68.3
|
||||
uses: github/gh-aw-actions/setup@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
@ -1409,7 +1409,7 @@ jobs:
|
||||
ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true
|
||||
- name: Setup threat detection
|
||||
if: always() && steps.detection_guard.outputs.run_detection == 'true'
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
WORKFLOW_NAME: "Repo Assist"
|
||||
WORKFLOW_DESCRIPTION: "A friendly repository assistant that runs 2 times a day to support contributors and maintainers.\nCan also be triggered on-demand via '/repo-assist <instructions>' to perform specific tasks.\n- Labels and triages open issues\n- Comments helpfully on open issues to unblock contributors and onboard newcomers\n- Identifies issues that can be fixed and creates draft pull requests with fixes\n- Improves performance, testing, and code quality via PRs\n- Makes engineering investments: dependency updates, CI improvements, tooling\n- Updates its own PRs when CI fails or merge conflicts arise\n- Nudges stale PRs waiting for author response\n- Takes the repository forward with proactive improvements\n- Maintains a persistent memory of work done and what remains\nAlways polite, constructive, and mindful of the project's goals."
|
||||
@ -1472,7 +1472,7 @@ jobs:
|
||||
- name: Parse and conclude threat detection
|
||||
id: detection_conclusion
|
||||
if: always()
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }}
|
||||
GH_AW_DETECTION_CONTINUE_ON_ERROR: "true"
|
||||
@ -1493,13 +1493,13 @@ jobs:
|
||||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@ba90f2186d7ad780ec640f364005fa24e797b360 # v0.68.3
|
||||
uses: github/gh-aw-actions/setup@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
- name: Check team membership for command workflow
|
||||
id: check_membership
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_REQUIRED_ROLES: "admin,maintainer,write"
|
||||
with:
|
||||
@ -1511,7 +1511,7 @@ jobs:
|
||||
await main();
|
||||
- name: Check command position
|
||||
id: check_command_position
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_COMMANDS: "[\"repo-assist\"]"
|
||||
with:
|
||||
@ -1542,7 +1542,7 @@ jobs:
|
||||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@ba90f2186d7ad780ec640f364005fa24e797b360 # v0.68.3
|
||||
uses: github/gh-aw-actions/setup@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
@ -1574,7 +1574,7 @@ jobs:
|
||||
- name: Push repo-memory changes (default)
|
||||
id: push_repo_memory_default
|
||||
if: always()
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
GITHUB_RUN_ID: ${{ github.run_id }}
|
||||
@ -1637,7 +1637,7 @@ jobs:
|
||||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@ba90f2186d7ad780ec640f364005fa24e797b360 # v0.68.3
|
||||
uses: github/gh-aw-actions/setup@239aec45b78c8799417efdd5bc6d8cc036629ec1 # v0.71.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
@ -1695,7 +1695,7 @@ jobs:
|
||||
echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV"
|
||||
- name: Process Safe Outputs
|
||||
id: process_safe_outputs
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_ALLOWED_DOMAINS: "*.gradle-enterprise.cloud,*.pythonhosted.org,*.vsblob.vsassets.io,adoptium.net,anaconda.org,api.adoptium.net,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.foojay.io,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.npms.io,api.nuget.org,api.snapcraft.io,archive.apache.org,archive.ubuntu.com,azure.archive.ubuntu.com,azuresearch-usnc.nuget.org,azuresearch-ussc.nuget.org,binstar.org,bootstrap.pypa.io,builds.dotnet.microsoft.com,bun.sh,cdn.azul.com,cdn.jsdelivr.net,central.sonatype.com,ci.dot.net,conda.anaconda.org,conda.binstar.org,crates.io,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,dc.services.visualstudio.com,deb.nodesource.com,deno.land,develocity.apache.org,dist.nuget.org,dl.google.com,dlcdn.apache.org,dot.net,dotnet.microsoft.com,dotnetcli.blob.core.windows.net,download.eclipse.org,download.java.net,download.oracle.com,downloads.gradle-dn.com,esm.sh,files.pythonhosted.org,ge.spockframework.org,get.pnpm.io,github.com,googleapis.deno.dev,googlechromelabs.github.io,gradle.org,host.docker.internal,index.crates.io,jcenter.bintray.com,jdk.java.net,json-schema.org,json.schemastore.org,jsr.io,keyserver.ubuntu.com,maven-central.storage-download.googleapis.com,maven.apache.org,maven.google.com,maven.oracle.com,maven.pkg.github.com,nodejs.org,npm.pkg.github.com,npmjs.com,npmjs.org,nuget.org,nuget.pkg.github.com,nugetregistryv2prod.blob.core.windows.net,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,oneocsp.microsoft.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,pip.pypa.io,pkgs.dev.azure.com,plugins-artifacts.gradle.org,plugins.gradle.org,ppa.launchpad.net,pypi.org,pypi.python.org,raw.githubusercontent.com,registry.bower.io,registry.npmjs.com,registry.npmjs.org,registry.yarnpkg.com,repo.anaconda.com,repo.continuum.io,repo.gradle.org,repo.grails.org,repo.maven.apache.org,repo.spring.io,repo.yarnpkg.com,repo1.maven.org,repository.apache.org,s.symcb.com,s.symcd.com,scans-in.gradle.com,security.ubuntu.com,services.gradle.org,sh.rustup.rs,skimdb.npmjs.com,static.crates.io,static.rust-lang.org,storage.googleapis.com,telemetry.enterprise.githubcopilot.com,telemetry.vercel.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com,www.java.com,www.microsoft.com,www.npmjs.com,www.npmjs.org,yarnpkg.com"
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@ -346,8 +346,3 @@ FodyWeavers.xsd
|
||||
Output/
|
||||
*.lscache
|
||||
test_ws.py
|
||||
|
||||
# Local visual test output
|
||||
visual-test-output/
|
||||
|
||||
.squad/
|
||||
|
||||
@ -1,31 +0,0 @@
|
||||
# Aaron: actual fixes for PR #274 bugs 2 and 3
|
||||
|
||||
## Bug 2 — tray quick-chat broken
|
||||
|
||||
Traced tray left-click to `InitializeTrayIcon()` -> `_trayIcon.Selected += OnTrayIconSelected` -> `OnTrayIconSelected()` -> `ShowChatWindow()`. The quick-chat path did use `ShowChatWindow`, but it resolved only `settings.Token` while the working operator client resolves `settings.Token`, `settings.BootstrapToken`, then stored `DeviceIdentity.DeviceToken` via `GatewayCredentialResolver`.
|
||||
|
||||
Changes:
|
||||
- `App.ShowChatWindow()` and chat pre-warm now use the same `GatewayCredentialResolver` pattern as the operator client.
|
||||
- `ShowChatWindow()` calls `ChatWindow.RefreshCredentials()` on every tray click, including newly-created windows.
|
||||
- `ChatWindow.RefreshCredentials()` always rebuilds the URL and navigates initialized WebView2 to it; it no longer returns early when the same stale URL is cached.
|
||||
- Added diagnostic logs: `[ChatWindow] Quick-chat credentials resolved from ...` and `[ChatWindow] Refreshing to ...`.
|
||||
- Applied Mattingly Bug 4 handoff: bootstrap injection now runs from `ChatWindow` after successful WebView navigation.
|
||||
|
||||
Manual validation for Mike: click tray icon; tail `%LOCALAPPDATA%\OpenClawTray\openclaw-tray.log` and look for `[ChatWindow] Refreshing to ...`, then verify chat loads without login loop.
|
||||
|
||||
## Bug 3 — pairing toast notification storm
|
||||
|
||||
Searched toast paths and traced pairing notifications through `WindowsNodeClient` direct `PairingStatusChanged` emitters (`pairing.requested`, `pairing.resolved`, `NOT_PAIRED`, and `hello-ok`) plus tray toasts in `App.OnPairingStatusChanged()` and `App.OnNodeStatusChanged()`.
|
||||
|
||||
Changes:
|
||||
- Routed all `WindowsNodeClient` pairing emitters through `EmitPairingStatusOnTransition()`; duplicates now log `[NODE] Suppressing duplicate pairing status event: ...`.
|
||||
- Added a toast-boundary 30-second dedupe in `App.ShowToast(builder, toastTag, deviceId)`, keyed by `(toastTag, deviceId)`.
|
||||
- Tagged node pairing pending/paired/rejected and node-connected toasts.
|
||||
- Suppressed the node-connected toast if a node-paired toast was just shown for the same device.
|
||||
- Added diagnostic logs: `[ToastDeduper] Showing toast tag=... deviceId=...` and `[ToastDeduper] Suppressed duplicate toast tag=... deviceId=...`.
|
||||
|
||||
Manual validation for Mike: complete pairing; expect exactly one node-paired toast and log line `[ToastDeduper] Showing toast tag=node-paired deviceId=...`; duplicates should log suppression.
|
||||
|
||||
## Validation
|
||||
|
||||
Ran `./build.ps1`: passed. Per fast-loop directive, skipped `dotnet test`.
|
||||
@ -1,45 +0,0 @@
|
||||
# Mattingly: actual fixes for PR #274 bugs 1, 4, 5
|
||||
|
||||
## Bug 1 — chat window auto-launch on Finish
|
||||
|
||||
Changed `OnboardingWindow.OnWizardComplete()` to ignore `WizardLifecycleState == "complete"`. The signal now is: the window is completing from `OnboardingRoute.Ready` and `StartupSetupState.RequiresSetup(settings, identityDataPath)` is false. That is the path the Finish button actually takes: `Ready` page Finish -> `OnboardingState.Complete()` -> `OnOnboardingFinished()` -> `OnWizardComplete()`.
|
||||
|
||||
Log to validate: `[OnboardingWindow] OnWizardComplete launching chat`.
|
||||
|
||||
## Bug 4 — BOOTSTRAP.md kickoff injection
|
||||
|
||||
Hardened `BootstrapMessageInjector`:
|
||||
|
||||
- Traverses shadow DOM for Lit UI controls.
|
||||
- Probes and logs visible control count: `[OpenClaw] Bootstrap probe controls=N`.
|
||||
- Supports `textarea`, text inputs, contenteditable, and role=textbox.
|
||||
- Uses native value setters so controlled inputs see the value.
|
||||
- Clicks Send/form-submit/Enter fallbacks.
|
||||
- Does **not** burn `HasInjectedFirstRunBootstrap` when the script returns `no-input`; the gate is only persisted on `sent`.
|
||||
|
||||
Aaron still needs to move the call site to after successful chat navigation because current `App.ShowChatWindow()` can see `TryGetScriptExecutor()==null` when the WebView2 is still initializing.
|
||||
|
||||
Exact handoff line for Aaron in `ChatWindow.xaml.cs` NavigationCompleted success branch after `RequestChatInputFocus();`:
|
||||
|
||||
```csharp
|
||||
OpenClawTray.Services.BootstrapMessageInjector.ScriptExecutor exec = script => WebView.CoreWebView2.ExecuteScriptAsync(script).AsTask();
|
||||
_ = OpenClawTray.Services.BootstrapMessageInjector.InjectAsync(exec, ((App)Microsoft.UI.Xaml.Application.Current).Settings, initialDelayMs: 500);
|
||||
```
|
||||
|
||||
If `App.Settings` is not exposed, add an internal property returning `_settings`, or route the existing `_settings` from `App.ShowChatWindow()` into a ChatWindow method. The important point is that the call must happen inside `NavigationCompleted` when `e.IsSuccess` is true.
|
||||
|
||||
## Bug 5 — autostart default/toggle
|
||||
|
||||
Changed `ReadyPage` to render the toggle ON as a safety default, then sync to `Settings.AutoStart` on mount and immediately call `AutoStartManager.SetAutoStart()` so a user who never toggles still gets the Run-key. The toggle handler still persists settings and updates the Run-key immediately.
|
||||
|
||||
Changed `AutoStartManager.SetAutoStart()` to use `Registry.CurrentUser.CreateSubKey(...)` instead of `OpenSubKey(...)`, so it can create the Run key/value when missing instead of silently returning.
|
||||
|
||||
Manual registry validation:
|
||||
|
||||
```powershell
|
||||
Get-ItemProperty 'HKCU:\Software\Microsoft\Windows\CurrentVersion\Run' -Name OpenClawTray -ErrorAction SilentlyContinue
|
||||
```
|
||||
|
||||
## Validation
|
||||
|
||||
Ran `./build.ps1`: passed. Per fast-loop directive, skipped `dotnet test`.
|
||||
@ -1,58 +0,0 @@
|
||||
# Mattingly — PR #274 finish should open Hub chat
|
||||
|
||||
## Audit
|
||||
|
||||
Command requested: `grep -rn "launching chat\|ShowChatWindow\|ShowHub\|OnWizardComplete" src/OpenClaw.Tray.WinUI` (run with ripgrep equivalent because `rg` was not on PATH in PowerShell; Copilot rg tool was used against the same tree).
|
||||
|
||||
HEAD before this fix: `8c68111 Launch hub chat after onboarding`.
|
||||
|
||||
Matches found:
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:498` — tray icon click calls `ShowChatWindow()`.
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:501` — `ShowChatWindow()` method.
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:542` — `ShowChatWindow` deferred-show warning string.
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:644` — tray menu `openchat` calls `ShowChatWindow()`.
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs:562,581,647,652,654,710,1043,1855,2809,2928,3048,3101,3603,4265` — `ShowHub(...)` method/call sites.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:587` — Finish event calls `OnWizardComplete()`.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:596` — X/Closed path calls `OnWizardComplete()`.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:620` — single `OnWizardComplete()` implementation.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:649` — required diagnostic log line.
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs:650,658,660,667,671,675,679` — deferred Hub chat launch helper.
|
||||
- Documentation/comment-only references in `ChatWindow.xaml.cs`, `HubWindow.xaml.cs`, `VoiceOverlayWindow.xaml.cs`, and `OnboardingState.cs`.
|
||||
|
||||
The literal old string `launching chat` has no remaining source match in this worktree.
|
||||
|
||||
## Diagnosis
|
||||
|
||||
The log Mike captured (`[OnboardingWindow] OnWizardComplete launching chat`) corresponds to the pre-`8c68111` body of `OnboardingWindow.OnWizardComplete` in `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs`, the only wizard-completion implementation. In the current clean worktree, `8c68111` did change that exact method to log `[OnboardingWindow] OnWizardComplete launching HubWindow on chat tab` and call `App.ShowHub("chat")`.
|
||||
|
||||
I did not find a second `OnWizardComplete`, overload, post-finish hook, or hidden `ShowHub` fallback to `ChatWindow`. `App.ShowHub(...)` creates a `HubWindow` when `_hubWindow` is null/closed, sets state, navigates, and activates it. The remaining `ShowChatWindow()` calls are tray quick-chat entry points, not wizard finish paths.
|
||||
|
||||
The prior fix therefore did not take in the live run because that run was not executing source/binaries containing `8c68111` (or was launched from another stale build/worktree). To make the wizard finish path more robust and easier to verify, this follow-up keeps the exact required log line and dispatches `ShowHub("chat")` at low priority after the wizard close event settles, so the Hub opens after the wizard finishes closing and cannot lose an ordering fight to wizard teardown.
|
||||
|
||||
## Changes
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs`
|
||||
- Keeps the required log line: `[OnboardingWindow] OnWizardComplete launching HubWindow on chat tab`.
|
||||
- Replaces the inline post-finish call with `ShowHubChatAfterWizardClose()`.
|
||||
- The helper dispatches `App.ShowHub("chat")` on the UI dispatcher at low priority, with a direct fallback if enqueue fails.
|
||||
- Adds an explicit warning if `Application.Current` is not the tray `App`.
|
||||
- Updates stale bootstrap comment from `App.ShowChatWindow()` to HubWindow chat navigation.
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/Services/OnboardingState.cs`
|
||||
- Updates stale route comment to say the Ready path launches the Hub chat tab, not the old chat window.
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/Services/BootstrapMessageInjector.cs`
|
||||
- Updates stale comment to describe HubWindow chat page injection instead of post-wizard `App.ShowChatWindow()`.
|
||||
|
||||
## Validation
|
||||
|
||||
- `git pull --rebase fork feat/wsl-gateway-clean` before commit: already up to date.
|
||||
- `./build.ps1`: passed.
|
||||
- Tests intentionally not run per active directive: NO tests, incremental `./build.ps1` only.
|
||||
|
||||
## Verification log line
|
||||
|
||||
Mike should verify this exact line on the next finish run:
|
||||
|
||||
`[OnboardingWindow] OnWizardComplete launching HubWindow on chat tab`
|
||||
@ -1,21 +0,0 @@
|
||||
# Mattingly: Finish opens HubWindow chat
|
||||
|
||||
## Summary
|
||||
Onboarding completion from Ready now launches the full HubWindow directly on the Chat tab instead of the standalone quick-chat ChatWindow.
|
||||
|
||||
## Changes
|
||||
- `src\OpenClaw.Tray.WinUI\App.xaml.cs`
|
||||
- Made `ShowHub(string? navigateTo = null, bool activate = true)` internal so onboarding can reuse the existing hub-opening path.
|
||||
- `src\OpenClaw.Tray.WinUI\Onboarding\OnboardingWindow.cs`
|
||||
- Replaced `ShowChatWindow()` completion launch with `ShowHub("chat")`.
|
||||
- Added diagnostic log: `[OnboardingWindow] OnWizardComplete launching HubWindow on chat tab`.
|
||||
- `src\OpenClaw.Tray.WinUI\Pages\ChatPage.xaml.cs`
|
||||
- Wired `BootstrapMessageInjector.InjectAsync` into the Hub chat WebView2 `NavigationCompleted` success path, matching the standalone `ChatWindow` gated injection behavior.
|
||||
|
||||
## Validation
|
||||
- Ran `./build.ps1` successfully after the code change.
|
||||
- Per active session directive, did not run tests after the fix.
|
||||
|
||||
## Architectural notes
|
||||
- Hub already exposes tag-based navigation through `NavigateTo("chat")`; `ShowHub("chat")` selects the existing NavigationView item and navigates to `ChatPage`.
|
||||
- Bootstrap injection remains wired in both standalone `ChatWindow` and Hub `ChatPage`; the existing global `Settings.HasInjectedFirstRunBootstrap` gate ensures only one path injects.
|
||||
@ -22,8 +22,4 @@ If a command fails:
|
||||
Notes:
|
||||
|
||||
- If a build/test is blocked by an environmental lock (for example running executable locking output assemblies), stop/close the locking process and rerun.
|
||||
- In linked git worktrees, set `OPENCLAW_REPO_ROOT` to the worktree path before running tests that discover the repository root, for example:
|
||||
- `$env:OPENCLAW_REPO_ROOT='D:\github\moltbot-windows-hub.<worktree-name>'`
|
||||
- Tray tests must isolate `SettingsManager` from real user settings. Do not use `new SettingsManager()` in tests unless the test intentionally reads `%APPDATA%\OpenClawTray\settings.json`; pass a temp settings directory or set `OPENCLAW_TRAY_DATA_DIR` before the test process starts.
|
||||
- Prefer isolated worktrees for PR validation. Use `git-wt` for worktree workflows; `wt.exe` may resolve to WorkTrunk instead of Windows Terminal, so use the full Windows Terminal path when explicitly launching Terminal.
|
||||
- Do not claim completion without reporting validation results.
|
||||
|
||||
@ -67,11 +67,13 @@ openclaw-windows-hub/
|
||||
├── .github/workflows/
|
||||
│ └── ci.yml # GitHub Actions CI/CD workflow
|
||||
│
|
||||
├── openclaw-windows-node.slnx # Solution file
|
||||
├── moltbot-windows-hub.slnx # Solution file (historical name)
|
||||
├── README.md # User-facing documentation
|
||||
└── DEVELOPMENT.md # This file
|
||||
```
|
||||
|
||||
> **Note on Naming:** The solution file is named `moltbot-windows-hub.slnx` due to the project's history (formerly known as Moltbot, formerly known as Clawdbot). The repository and current branding use "OpenClaw".
|
||||
|
||||
### Project Dependencies
|
||||
|
||||
```
|
||||
@ -87,7 +89,7 @@ OpenClaw.Tray.Tests ──tests──▶ OpenClaw.Shared
|
||||
|-----------|----------|---------|
|
||||
| **Gateway Communication** | `OpenClaw.Shared/OpenClawGatewayClient.cs` | WebSocket client with protocol v3, reconnect/backoff logic |
|
||||
| **Notification System** | `OpenClaw.Tray.WinUI/App.xaml.cs` | Event routing, toast notifications, classification |
|
||||
| **WebView2 Integration** | `OpenClaw.Tray.WinUI/Windows/ChatWindow.xaml.cs` | Embedded chat panel with lifecycle management |
|
||||
| **WebView2 Integration** | `OpenClaw.Tray.WinUI/Windows/WebChatWindow.xaml.cs` | Embedded chat panel with lifecycle management |
|
||||
| **Tray Icon Management** | `OpenClaw.Tray.WinUI/Helpers/IconHelper.cs` | GDI handle management, dynamic icon generation |
|
||||
| **Session Tracking** | `OpenClaw.Shared/OpenClawGatewayClient.cs` | Session state, activity tracking, polling |
|
||||
| **Settings & Logging** | `OpenClaw.Tray.WinUI/Services/` | JSON settings persistence, file rotation logging |
|
||||
@ -166,7 +168,7 @@ dotnet build -p:EnableWindowsTargeting=true
|
||||
|
||||
#### Visual Studio
|
||||
|
||||
1. Open `openclaw-windows-node.slnx` in Visual Studio 2022
|
||||
1. Open `moltbot-windows-hub.slnx` in Visual Studio 2022
|
||||
2. Set `OpenClaw.Tray.WinUI` as the startup project
|
||||
3. Press F5 to run with debugging
|
||||
|
||||
@ -285,7 +287,7 @@ Notifications are classified using two strategies:
|
||||
|
||||
### WebView2 Lifecycle
|
||||
|
||||
The `ChatWindow` uses Microsoft Edge WebView2 for embedded web content:
|
||||
The `WebChatWindow` uses Microsoft Edge WebView2 for embedded web content:
|
||||
|
||||
**Initialization:**
|
||||
1. WebView2 control created in XAML
|
||||
@ -299,7 +301,7 @@ Window Created → WebView2.EnsureCoreWebView2Async() → Navigate to Chat URL
|
||||
```
|
||||
|
||||
**Key Design Decisions:**
|
||||
- **Singleton pattern**: Only one chat window instance exists
|
||||
- **Singleton pattern**: Only one WebChat window instance exists
|
||||
- **Hidden instead of disposed**: Window is hidden when closed to preserve state
|
||||
- **Separate user data folder**: Isolates cookies/storage from browser
|
||||
- **Navigation guard**: Prevents accidental navigation away from chat
|
||||
@ -425,8 +427,8 @@ dotnet test --filter "FullyQualifiedName~AgentActivityTests"
|
||||
```
|
||||
|
||||
**Test Coverage:**
|
||||
- ✅ **1182 tests** in `OpenClaw.Shared.Tests` — models, gateway client, exec approvals, capabilities, URL helpers, notification categorization, shell quoting, MCP, device identity, and WinNode client coverage
|
||||
- ✅ **388 tests** in `OpenClaw.Tray.Tests` — settings round-trip, deep link parsing, onboarding state, setup code decoder, gateway health/chat helpers, security validation, wizard step parsing, gateway discovery, localization validation
|
||||
- ✅ **478 tests** in `OpenClaw.Shared.Tests` — models, gateway client, exec approvals, capabilities, URL helpers, notification categorization, shell quoting
|
||||
- ✅ **93 tests** in `OpenClaw.Tray.Tests` — menu display, menu positioning, settings round-trip, deep link parsing
|
||||
- ✅ All tests are pure unit tests (no network, no file system, no external dependencies)
|
||||
|
||||
See [tests/OpenClaw.Shared.Tests/README.md](tests/OpenClaw.Shared.Tests/README.md) for detailed test documentation.
|
||||
@ -441,7 +443,7 @@ You can test the UI and basic functionality without a running gateway:
|
||||
3. Enter a dummy gateway URL (e.g., `ws://localhost:18789`)
|
||||
4. The app will show "Disconnected" status but you can:
|
||||
- Test the tray menu structure
|
||||
- Open the Settings page and configure preferences
|
||||
- Open Settings dialog and configure preferences
|
||||
- Test auto-start functionality
|
||||
- View logs
|
||||
|
||||
@ -487,8 +489,8 @@ You can test the UI and basic functionality without a running gateway:
|
||||
- Verify Windows toast notification appears (if enabled)
|
||||
- Click toast → should open relevant UI
|
||||
|
||||
2. **Activity / notification history**:
|
||||
- Right-click tray → **Activity Stream** or **Notification History**
|
||||
2. **Notification History**:
|
||||
- Right-click tray → **Notification History**
|
||||
- Verify past notifications are listed
|
||||
- Test filtering by category
|
||||
|
||||
@ -747,51 +749,6 @@ gh run download <run-id> --repo shanselman/openclaw-windows-hub
|
||||
- **Discussions**: [GitHub Discussions](https://github.com/shanselman/openclaw-windows-hub/discussions)
|
||||
- **Documentation**: [OpenClaw Docs](https://docs.molt.bot)
|
||||
|
||||
## Developing & Testing the Onboarding Wizard
|
||||
|
||||
The onboarding wizard is a 6-screen flow built with OpenClaw's minimal FunctionalUI helper layer for declarative C# WinUI. The chat page uses a WebView2 overlay for visual consistency with the post-setup chat experience.
|
||||
|
||||
### Building
|
||||
|
||||
The WinUI project requires platform-specific build targets. Use the build script:
|
||||
|
||||
```bash
|
||||
./build.ps1 -Project WinUI # Builds with correct -r win-x64 targets
|
||||
```
|
||||
|
||||
Direct `dotnet build` without the script will fail with "WindowsAppSDKSelfContained requires a supported Windows architecture".
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Purpose |
|
||||
|----------|---------|
|
||||
| `OPENCLAW_FORCE_ONBOARDING=1` | Show onboarding wizard even if a token already exists |
|
||||
| `OPENCLAW_SKIP_UPDATE_CHECK=1` | Skip the update dialog (useful during testing) |
|
||||
| `OPENCLAW_LANGUAGE=fr-fr` | Override UI language (validated: en-us, fr-fr, nl-nl, zh-cn, zh-tw) |
|
||||
| `OPENCLAW_GATEWAY_PORT=19001` | Override default gateway port for local dev |
|
||||
| `OPENCLAW_VISUAL_TEST=1` | Enable automatic screenshot capture on page transitions |
|
||||
| `OPENCLAW_VISUAL_TEST_DIR=path` | Output directory for visual test screenshots |
|
||||
|
||||
### Testing the Wizard Locally
|
||||
|
||||
1. Start a local gateway (e.g., in WSL): `cd ~/openclaw && npx openclaw gateway`
|
||||
2. Set env vars:
|
||||
```powershell
|
||||
$env:OPENCLAW_FORCE_ONBOARDING = "1"
|
||||
$env:OPENCLAW_SKIP_UPDATE_CHECK = "1"
|
||||
```
|
||||
3. Build and run: `./build.ps1 -Project WinUI` then launch the exe
|
||||
4. Navigate through all 6 screens to verify
|
||||
|
||||
### Architecture
|
||||
|
||||
- **FunctionalUI**: `src/OpenClawTray.FunctionalUI/` — Minimal declarative WinUI helper layer used by onboarding
|
||||
- **Pages**: `src/OpenClaw.Tray.WinUI/Onboarding/Pages/` — Functional UI components for each wizard screen
|
||||
- **Services**: `src/OpenClaw.Tray.WinUI/Onboarding/Services/` — State management, setup code decoder, permission checker, health check, input validation
|
||||
- **Widgets**: `src/OpenClaw.Tray.WinUI/Onboarding/Widgets/` — Shared UI components (cards, step indicators, feature rows)
|
||||
- **Window**: `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs` — Host window with WebView2 overlay for chat
|
||||
- **Helpers**: `src/OpenClaw.Tray.WinUI/Helpers/GatewayChatHelper.cs` — Shared WebView2 chat URL builder
|
||||
|
||||
---
|
||||
|
||||
*Made with 🦞 love by Scott Hanselman and the OpenClaw community*
|
||||
|
||||
123
README.md
123
README.md
@ -98,14 +98,14 @@ Modern Windows 11-style system tray companion that connects to your local OpenCl
|
||||
- 🌐 **Web Chat** - Embedded chat window with WebView2
|
||||
- 📊 **Live Status** - Real-time sessions, channels, and usage display
|
||||
- 🧭 **Command Center** - Dense gateway, channel, usage, node, pairing, and allowlist diagnostics from one window
|
||||
- ⚡ **Activity Stream** - Command Center page for live session, usage, node, and notification events
|
||||
- ⚡ **Activity Stream** - Dedicated flyout for live session, usage, node, and notification events
|
||||
- 🔔 **Toast Notifications** - Clickable Windows notifications with [smart categorization](docs/NOTIFICATION_CATEGORIZATION.md)
|
||||
- 📡 **Channel Control** - Start/stop Telegram & WhatsApp from the menu
|
||||
- 🖥️ **Node Observability** - Node inventory with online/offline state and copyable summary
|
||||
- ⏱ **Cron Jobs** - Quick access to scheduled tasks
|
||||
- 🚀 **Auto-start** - Launch with Windows
|
||||
- ⚙️ **Settings** - Full configuration page
|
||||
- 🎯 **First-run onboarding** — 6-screen setup wizard (connection, permissions, chat, configuration)
|
||||
- ⚙️ **Settings** - Full configuration dialog
|
||||
- 🎯 **First-run experience** - Welcome dialog guides new users
|
||||
|
||||
#### Quick Send scope requirement
|
||||
|
||||
@ -123,15 +123,14 @@ If Quick Send fails with `pairing required` / `NOT_PAIRED`, that is a **device a
|
||||
|
||||
### Menu Sections
|
||||
- **Status** - Gateway connection status with click-to-view details
|
||||
- **Command Center** - Hub with diagnostics, channel health, usage, sessions, nodes, and copyable repair commands
|
||||
- **Command Center** - Status detail window with diagnostics, channel health, usage, sessions, nodes, and copyable repair commands
|
||||
- **Sessions** - Active agent sessions with preview and per-session controls
|
||||
- **Usage** - Provider/cost summary with quick jump to activity details
|
||||
- **Channels** - Telegram/WhatsApp status with toggle control
|
||||
- **Nodes** - Online/offline node inventory and copyable summary
|
||||
- **Recent Activity** - Timestamped event stream for sessions, usage, nodes, and notifications
|
||||
- **Actions** - Dashboard, Web Chat, Quick Send, Activity Stream, History
|
||||
- **Support & Debug** - Logs, config, diagnostics folder, redacted support context, browser setup, port/capability/node/channel/activity summaries, and managed SSH tunnel restart
|
||||
- **Settings** - Configuration and auto-start
|
||||
- **Settings** - Configuration, auto-start, logs
|
||||
|
||||
### Mac Parity Status
|
||||
|
||||
@ -141,11 +140,11 @@ Comparing against [openclaw-menubar](https://github.com/magimetal/openclaw-menub
|
||||
|---------|-----|---------|-------|
|
||||
| Menu bar/tray icon | ✅ | ✅ | Color-coded status |
|
||||
| Gateway status display | ✅ | ✅ | Connected/Disconnected |
|
||||
| PID display | ✅ | ✅ | Command Center shows gateway listener process/PID |
|
||||
| PID display | ✅ | ❌ | Mac shows gateway PID |
|
||||
| Channel status | ✅ | ✅ | Mac: Discord / Win: Telegram+WhatsApp |
|
||||
| Sessions count | ✅ | ✅ | |
|
||||
| Last check timestamp | ✅ | ✅ | Shown in tray tooltip |
|
||||
| Gateway start/stop/restart | ✅ | ⚠️ | Windows can restart the managed SSH tunnel from tray Support & Debug and Command Center; external gateway process control is not implemented |
|
||||
| Gateway start/stop/restart | ✅ | ❌ | Mac controls gateway process |
|
||||
| View Logs | ✅ | ✅ | |
|
||||
| Open Web UI | ✅ | ✅ | |
|
||||
| Refresh | ✅ | ✅ | Auto-refresh on menu open |
|
||||
@ -164,7 +163,7 @@ These features are available in Windows but not in the Mac app:
|
||||
| Channel control | Start/stop Telegram & WhatsApp |
|
||||
| Modern flyout menu | Windows 11-style with dark/light mode |
|
||||
| Deep links | `openclaw://` URL scheme with IPC |
|
||||
| First-run onboarding | 6-screen guided setup wizard (Welcome → Connection → Wizard → Permissions → Chat → Ready) |
|
||||
| First-run welcome | Guided onboarding for new users |
|
||||
| PowerToys integration | Command Palette extension |
|
||||
|
||||
### 🔌 Node Mode (Agent Control)
|
||||
@ -177,12 +176,8 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t
|
||||
| **Canvas** | `canvas.present`, `canvas.hide`, `canvas.navigate`, `canvas.eval`, `canvas.snapshot`, `canvas.a2ui.push`, `canvas.a2ui.pushJSONL`, `canvas.a2ui.reset` | Display and control a WebView2 window |
|
||||
| **Screen** | `screen.snapshot`, `screen.record` | Capture screenshots and fixed-duration MP4 screen recordings |
|
||||
| **Camera** | `camera.list`, `camera.snap`, `camera.clip` | Enumerate cameras and capture still photos or short video clips |
|
||||
| **Speech-to-text** | `stt.transcribe` | Capture audio from the default microphone for a bounded duration and return transcribed text. Default-off; opt-in via Settings. When enabled, advertised to both gateway callers (subject to gateway allowlist) and local MCP clients (subject to bearer token). |
|
||||
| **Location** | `location.get` | Return Windows geolocation when permission is available |
|
||||
| **Device** | `device.info`, `device.status` | Return Windows host/app metadata and lightweight status |
|
||||
| **Text-to-speech** | `tts.speak` | Speak text aloud through Windows speech synthesis, or ElevenLabs when configured |
|
||||
|
||||
Packaged installs declare camera, microphone, and location capabilities. Windows may ask for consent the first time a node capability uses one of those protected resources.
|
||||
|
||||
#### Node Setup
|
||||
|
||||
@ -209,24 +204,23 @@ Packaged installs declare camera, microphone, and location capabilities. Windows
|
||||
"canvas.hide",
|
||||
"canvas.navigate",
|
||||
"canvas.eval",
|
||||
"canvas.snapshot",
|
||||
"canvas.a2ui.push",
|
||||
"canvas.a2ui.pushJSONL",
|
||||
"canvas.a2ui.reset",
|
||||
"screen.snapshot",
|
||||
"camera.list",
|
||||
"camera.snap",
|
||||
"camera.clip",
|
||||
"location.get",
|
||||
"device.info",
|
||||
"device.status",
|
||||
"tts.speak"
|
||||
"canvas.snapshot",
|
||||
"canvas.a2ui.push",
|
||||
"canvas.a2ui.pushJSONL",
|
||||
"canvas.a2ui.reset",
|
||||
"screen.snapshot",
|
||||
"camera.list",
|
||||
"camera.snap",
|
||||
"camera.clip",
|
||||
"location.get",
|
||||
"device.info",
|
||||
"device.status"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
> ⚠️ **Important**: The gateway has a server-side allowlist. Commands must be listed explicitly - wildcards like `canvas.*` don't work! Privacy-sensitive commands such as `screen.record` and agent-driven audio playback via `tts.speak` should only be added to `allowCommands` when you explicitly want to allow them.
|
||||
> ⚠️ **Important**: The gateway has a server-side allowlist. Commands must be listed explicitly - wildcards like `canvas.*` don't work! Privacy-sensitive commands such as `screen.record` should only be added to `allowCommands` when you explicitly want to allow them.
|
||||
|
||||
5. **Test it** from your Mac/gateway:
|
||||
```bash
|
||||
@ -254,9 +248,6 @@ Packaged installs declare camera, microphone, and location capabilities. Windows
|
||||
# Take a photo (NV12/MediaCapture fallback)
|
||||
openclaw nodes invoke --node <id> --command camera.snap --params '{"deviceId":"<device-id>","format":"jpeg","quality":80}'
|
||||
|
||||
# Speak text aloud on the Windows node (requires TTS enabled in Settings and tts.speak allowed on the gateway)
|
||||
openclaw nodes invoke --node <id> --command tts.speak --params '{"text":"Hello from OpenClaw","provider":"windows"}'
|
||||
|
||||
# Execute a command on the Windows node
|
||||
openclaw nodes invoke --node <id> --command system.run --params '{"command":"Get-Process | Select -First 5","shell":"powershell","timeoutMs":10000}'
|
||||
|
||||
@ -302,35 +293,12 @@ OpenClaw registers the `openclaw://` URL scheme for automation and integration:
|
||||
|
||||
| Link | Description |
|
||||
|------|-------------|
|
||||
| `openclaw://settings` | Open the Settings page |
|
||||
| `openclaw://setup` | Open Setup Wizard |
|
||||
| `openclaw://chat` | Open the Chat page |
|
||||
| `openclaw://commandcenter` | Open Command Center diagnostics |
|
||||
| `openclaw://activity` | Open the Activity page |
|
||||
| `openclaw://history` | Open the Activity page filtered to notification history |
|
||||
| `openclaw://settings` | Open Settings dialog |
|
||||
| `openclaw://chat` | Open Web Chat window |
|
||||
| `openclaw://dashboard` | Open Dashboard in browser |
|
||||
| `openclaw://dashboard/sessions` | Open specific dashboard page |
|
||||
| `openclaw://dashboard/channels` | Open Channels dashboard page |
|
||||
| `openclaw://dashboard/skills` | Open Skills dashboard page |
|
||||
| `openclaw://dashboard/cron` | Open Cron dashboard page |
|
||||
| `openclaw://healthcheck` | Run a manual health check |
|
||||
| `openclaw://check-updates` | Run a manual update check |
|
||||
| `openclaw://logs` | Open the current tray log file |
|
||||
| `openclaw://log-folder` | Open the logs folder |
|
||||
| `openclaw://config` | Open the config folder |
|
||||
| `openclaw://diagnostics` | Open the diagnostics JSONL folder |
|
||||
| `openclaw://support-context` | Copy redacted support context |
|
||||
| `openclaw://debug-bundle` | Copy a combined debug bundle for support |
|
||||
| `openclaw://browser-setup` | Copy browser.proxy/browser-control setup guidance |
|
||||
| `openclaw://port-diagnostics` | Copy gateway/browser/tunnel port diagnostics with owner PID stop hints |
|
||||
| `openclaw://capability-diagnostics` | Copy permissions, allowlist, and parity diagnostics |
|
||||
| `openclaw://node-inventory` | Copy node capabilities, commands, and policy status |
|
||||
| `openclaw://channel-summary` | Copy channel health and start/stop availability |
|
||||
| `openclaw://activity-summary` | Copy recent tray activity for troubleshooting |
|
||||
| `openclaw://extensibility-summary` | Copy channel, skills, and cron dashboard surface guidance |
|
||||
| `openclaw://restart-ssh-tunnel` | Restart the tray-managed SSH tunnel when enabled |
|
||||
| `openclaw://send?message=Hello` | Open Quick Send with pre-filled text |
|
||||
| `openclaw://agent?message=Hello` | Send message directly to the connected gateway |
|
||||
| `openclaw://agent?message=Hello` | Send message directly (with confirmation) |
|
||||
|
||||
Deep links work even when Molty is already running - they're forwarded via IPC.
|
||||
|
||||
@ -340,30 +308,9 @@ PowerToys Command Palette extension for quick OpenClaw access.
|
||||
|
||||
### Commands
|
||||
- **🦞 Open Dashboard** - Launch the OpenClaw web dashboard
|
||||
- **💬 Dashboard: Sessions** - Open the sessions dashboard
|
||||
- **📡 Dashboard: Channels** - Open the channel configuration dashboard
|
||||
- **🧩 Dashboard: Skills** - Open the skills dashboard
|
||||
- **⏱️ Dashboard: Cron** - Open the scheduled jobs dashboard
|
||||
- **💬 Web Chat** - Open the embedded Chat page
|
||||
- **💬 Web Chat** - Open the embedded Web Chat window
|
||||
- **📝 Quick Send** - Open the Quick Send dialog to compose a message
|
||||
- **🧭 Setup Wizard** - Open pairing/setup
|
||||
- **🧭 Command Center** - Open diagnostics and support actions
|
||||
- **🔄 Run Health Check** - Refresh connection health
|
||||
- **⬇️ Check for Updates** - Run a manual GitHub Releases update check
|
||||
- **⚡ Activity Stream** - Open recent activity
|
||||
- **📋 Notification History** - Open notification history in the Activity page
|
||||
- **⚙️ Settings** - Open the OpenClaw Tray Settings page
|
||||
- **📄 Open Log File / 📁 Logs / 🗂️ Config / 🧪 Diagnostics** - Open support files and folders
|
||||
- **📋 Copy Support Context** - Copy redacted Command Center metadata
|
||||
- **🧰 Copy Debug Bundle** - Copy combined support, port, capability, node, channel, and activity diagnostics
|
||||
- **🌐 Copy Browser Setup** - Copy browser.proxy and node-host setup guidance
|
||||
- **🔌 Copy Port Diagnostics** - Copy gateway/browser/tunnel port owners and stop hints
|
||||
- **🛡️ Copy Capability Diagnostics** - Copy permission, allowlist, and parity diagnostics
|
||||
- **🖥️ Copy Node Inventory** - Copy node capabilities, commands, and policy status
|
||||
- **📡 Copy Channel Summary** - Copy channel health and start/stop availability
|
||||
- **⚡ Copy Activity Summary** - Copy recent tray activity
|
||||
- **🧩 Copy Extensibility Summary** - Copy channel, skills, and cron surface guidance
|
||||
- **🔁 Restart SSH Tunnel** - Restart the tray-managed SSH tunnel when enabled
|
||||
- **⚙️ Settings** - Open the OpenClaw Tray Settings dialog
|
||||
|
||||
### Installation
|
||||
1. Run the OpenClaw Tray installer and tick **"Install PowerToys Command Palette extension"**, or
|
||||
@ -394,7 +341,7 @@ openclaw-windows-node/
|
||||
│ └── OpenClaw.Tray.Tests/ # Tray app helper tests
|
||||
├── docs/
|
||||
│ └── molty1.png # Screenshot
|
||||
├── openclaw-windows-node.slnx # Solution file
|
||||
├── moltbot-windows-hub.slnx # Solution file (historical name)
|
||||
├── README.md
|
||||
├── LICENSE
|
||||
└── .gitignore
|
||||
@ -410,16 +357,10 @@ Default gateway: `ws://localhost:18789`
|
||||
|
||||
### First Run
|
||||
|
||||
On first run, Molty launches a guided onboarding wizard that walks you through setup:
|
||||
|
||||
1. **Welcome** — introduces OpenClaw and starts the setup flow
|
||||
2. **Connection** — choose Local gateway, Remote gateway, or configure later. Paste a setup code or enter gateway URL and token manually. Tests the connection with Ed25519 device authentication.
|
||||
3. **Wizard** — gateway-driven configuration steps (AI provider selection, personality setup, communication channels). Steps are defined by your gateway.
|
||||
4. **Permissions** — reviews Windows system permissions (notifications, camera, microphone, screen capture, location) and links to system settings to grant them.
|
||||
5. **Chat** — meet your agent in a live chat powered by the gateway's web UI.
|
||||
6. **Ready** — summary of available features, option to launch at startup, and a Finish button.
|
||||
|
||||
For detailed setup instructions, see [docs/SETUP.md](docs/SETUP.md). For the full onboarding architecture, see [docs/ONBOARDING_WIZARD.md](docs/ONBOARDING_WIZARD.md).
|
||||
On first run without a token, Molty displays a welcome dialog that:
|
||||
1. Explains what's needed to get started
|
||||
2. Links to [documentation](https://docs.molt.bot/web/dashboard) for token setup
|
||||
3. Opens Settings to configure the connection
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@ -23,7 +23,7 @@
|
||||
#>
|
||||
|
||||
param(
|
||||
[ValidateSet("All", "Tray", "WinUI", "Shared", "CommandPalette", "Cli", "WinNodeCli")]
|
||||
[ValidateSet("All", "Tray", "WinUI", "Shared", "CommandPalette", "Cli")]
|
||||
[string]$Project = "All",
|
||||
|
||||
[ValidateSet("Debug", "Release")]
|
||||
@ -188,13 +188,12 @@ function Build-Project($name, $path, $useRid = $false) {
|
||||
$projects = @{
|
||||
"Shared" = @{ Path = "src/OpenClaw.Shared/OpenClaw.Shared.csproj"; UseRid = $false }
|
||||
"Cli" = @{ Path = "src/OpenClaw.Cli/OpenClaw.Cli.csproj"; UseRid = $false }
|
||||
"WinNodeCli" = @{ Path = "src/OpenClaw.WinNode.Cli/OpenClaw.WinNode.Cli.csproj"; UseRid = $false }
|
||||
"Tray" = @{ Path = "src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj"; UseRid = $true }
|
||||
"WinUI" = @{ Path = "src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj"; UseRid = $true }
|
||||
"CommandPalette" = @{ Path = "src/OpenClaw.CommandPalette/OpenClaw.CommandPalette.csproj"; UseRid = $false }
|
||||
}
|
||||
|
||||
$toBuild = if ($Project -eq "All") { @("Shared", "Cli", "WinNodeCli", "WinUI") } else { @($Project) }
|
||||
$toBuild = if ($Project -eq "All") { @("Shared", "Cli", "WinUI") } else { @($Project) }
|
||||
|
||||
# Always build Shared first if building other projects
|
||||
if ($Project -ne "Shared" -and $Project -ne "All" -and $toBuild -notcontains "Shared") {
|
||||
|
||||
@ -1,300 +0,0 @@
|
||||
# Native WinUI A2UI Canvas — Design Spec
|
||||
|
||||
> **Status:** Draft / proposal
|
||||
> **Audience:** Contributors implementing a native A2UI renderer for the Windows node
|
||||
> **Target version:** A2UI v0.8 (parity with current openclaw clients), with a v0.9 migration path
|
||||
|
||||
## 1. Motivation
|
||||
|
||||
Today the Windows node renders A2UI by hosting a WebView2 control (`CanvasWindow`) that navigates to an HTTP page served by the openclaw gateway at `/__openclaw__/a2ui/`. That page bundles `@a2ui/lit` and openclaw's bridge JS. Pushed messages travel `agent → gateway → node (canvas.a2ui.push) → WebView2 → window.__a2ui.receive(msg)`.
|
||||
|
||||
That works, but it has costs:
|
||||
|
||||
- **Hard gateway dependency.** A node running in MCP-only mode (no gateway connection) silently drops A2UI pushes — `OnCanvasA2UIPush` bails when `_a2uiHostUrl` is null. The renderer code physically lives at the gateway.
|
||||
- **WebView2 surface area.** Drag/drop, IME, accessibility, focus, DPI, and keyboard shortcuts inherit WebView2 quirks instead of XAML's native behavior. The canvas always feels like an embedded browser.
|
||||
- **Bootstrapping latency.** Each cold start has to ensure WebView2 is ready, navigate, and wait for `window.__a2ui` to register before any message can be delivered (`EnsureA2UIHostAsync` + `ensureA2uiReady` polling).
|
||||
- **Theming drift.** WinUI windows around the canvas use Mica/Fluent; the canvas uses Lit components styled with CSS. Achieving consistent visuals requires duplicate theme work.
|
||||
- **Hardening.** Surface area for arbitrary script execution remains larger than necessary for what is fundamentally a declarative UI tree.
|
||||
|
||||
A native WinUI renderer renders A2UI surfaces directly into XAML — no WebView, no HTTP host, no JS bridge. The node becomes self-contained: it can render A2UI whether it's connected to a gateway, an MCP client, or both.
|
||||
|
||||
## 2. Goals & non-goals
|
||||
|
||||
### Goals
|
||||
|
||||
- **Render A2UI v0.8 standard-catalog surfaces natively** in the Windows node using WinUI 3 / XAML controls.
|
||||
- **Preserve the existing wire protocol.** Agents continue to send A2UI JSONL via `canvas.a2ui.push` / `canvas.a2ui.reset`. Nothing about the agent side changes.
|
||||
- **Work offline / gateway-less.** A WSL-less, gateway-less Windows node can still display rich UI from an MCP client.
|
||||
- **Match Fluent / WinUI design language** by default; allow theme overrides from the surface payload.
|
||||
- **Stream incremental updates** without flicker (component adds/updates/deletes mid-task).
|
||||
|
||||
### Non-goals (initial release)
|
||||
|
||||
- No A2UI v0.9 features (bidirectional messaging, prompt-first generation, modular schemas).
|
||||
- No HTML/JS/CSS escape hatch from inside an A2UI surface (the v0.8 catalog has no such primitive — keep it that way).
|
||||
- No replacement for `canvas.present` / `canvas.navigate` / `canvas.eval`. Those continue to use WebView2 for general web content. Only A2UI rendering moves.
|
||||
- No custom (non-catalog) component types in v1. Catalog-strict.
|
||||
|
||||
## 3. Architecture
|
||||
|
||||
### 3.1 Boundary
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ OpenClaw.Tray.WinUI (existing) │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ NodeService │ │
|
||||
│ │ CanvasCapability (existing) │ │
|
||||
│ │ ├─ canvas.a2ui.push ──► A2UIPushRequested ─┐ │ │
|
||||
│ │ └─ canvas.a2ui.reset ──► A2UIResetRequested┐│ │ │
|
||||
│ │ ││ │ │
|
||||
│ │ OnCanvasA2UIPush / OnCanvasA2UIReset (existing)││ │ │
|
||||
│ │ dispatched to UI thread, route to: ││ │ │
|
||||
│ └─────────────────────────────────────────────────││──┘ │
|
||||
│ ▼▼ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ A2UICanvasWindow (new) ─ replaces WebView2 path │ │
|
||||
│ │ ├─ A2UIRouter (parses & dispatches msgs) │ │
|
||||
│ │ ├─ SurfaceHost x N (one per createSurface) │ │
|
||||
│ │ │ └─ ComponentTree (XAML) │ │
|
||||
│ │ ├─ DataModelStore (per surface) │ │
|
||||
│ │ ├─ ActionDispatcher (UI events → ws/MCP) │ │
|
||||
│ │ └─ ThemeProvider (Fluent + payload overrides) │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
The existing `CanvasCapability` and the events it raises (`A2UIPushRequested`, `A2UIResetRequested`) are unchanged. `NodeService.OnCanvasA2UIPush` no longer calls `EnsureA2UIHostAsync` / `SendA2UIMessageAsync` against a WebView2; it instead hands the JSONL to a new `A2UICanvasWindow` (or the existing `CanvasWindow` if we choose to host both renderers).
|
||||
|
||||
### 3.2 Coexistence with WebView2 canvas
|
||||
|
||||
Two canvas modes share the surface:
|
||||
|
||||
| Mode | Trigger | Window |
|
||||
|---|---|---|
|
||||
| Web (`canvas.present` / `canvas.navigate` / `canvas.eval`) | URL or HTML payload | `CanvasWindow` (WebView2) — unchanged |
|
||||
| A2UI native | First `canvas.a2ui.push` since reset | `A2UICanvasWindow` (XAML) — new |
|
||||
|
||||
A user-visible toggle is *not* required — the choice is implicit in which MCP command the agent calls. The two windows must not compete for focus; if both want to be visible, the most-recently-targeted wins (last-write-wins, with a small fade between).
|
||||
|
||||
### 3.3 Component pipeline
|
||||
|
||||
```
|
||||
JSONL line
|
||||
→ System.Text.Json deserialize → A2UIMessage (sealed record hierarchy)
|
||||
→ A2UIRouter.Dispatch(message)
|
||||
├─ CreateSurface → SurfaceHost.Create(surfaceId, catalogId, theme)
|
||||
├─ UpdateComponents → SurfaceHost.ApplyComponents(adjacencyList)
|
||||
├─ UpdateDataModel → DataModelStore.Apply(surfaceId, patch)
|
||||
└─ DeleteSurface → SurfaceHost.Dispose(surfaceId)
|
||||
→ SurfaceHost rebuilds/patches its XAML subtree on the UI thread
|
||||
→ DataModel changes notify bound components via INotifyPropertyChanged
|
||||
```
|
||||
|
||||
Component identity is by **string ID**. A `LogicalTreeBuilder` keeps an `IDictionary<string, FrameworkElement>` per surface so `updateComponents` can mutate in place without rebuilding the entire tree (avoids flicker, preserves focus and scroll position).
|
||||
|
||||
## 4. Wire protocol
|
||||
|
||||
### 4.1 Inbound (agent → node)
|
||||
|
||||
Use the existing capability commands verbatim. No protocol change is required for this work.
|
||||
|
||||
```json
|
||||
{ "version": "v0.8", "createSurface": { "surfaceId": "main", "catalogId": "https://a2ui.org/specification/v0_8/standard_catalog.json", "sendDataModel": true } }
|
||||
{ "updateComponents": { "surfaceId": "main", "components": [ { "id": "root", "componentName": "Column", "properties": {...}, "children": ["title","actions"] }, ... ] } }
|
||||
{ "updateDataModel": { "surfaceId": "main", "patch": { "/userName": "Scott" } } }
|
||||
{ "deleteSurface": { "surfaceId": "main" } }
|
||||
```
|
||||
|
||||
The renderer SHOULD validate each line against the v0.8 envelope schema before dispatch. The schema lives at `vendor/a2ui/specification/0.8/json/server_to_client.json` in the openclaw repo and should be vendored into `OpenClaw.Shared/Schemas/A2UI_v0_8/`.
|
||||
|
||||
Unknown envelope keys → log + skip (do not throw). Unknown component types → render an `A2UIUnknown` placeholder showing the type name and a warning glyph; never crash.
|
||||
|
||||
### 4.2 Outbound (node → agent)
|
||||
|
||||
When the user interacts with a surface, the renderer raises an A2UI **action** event. Action delivery rides whichever transport the node is currently connected through:
|
||||
|
||||
- Gateway-connected: serialize as the v0.8 client→server envelope and ship over the existing WebSocket via `_nodeClient`.
|
||||
- MCP-only: emit as an MCP notification on a new `canvas/a2ui/action` channel (to be added to `CanvasCapability`).
|
||||
|
||||
Action payload shape (v0.8):
|
||||
|
||||
```json
|
||||
{
|
||||
"action": {
|
||||
"name": "primary",
|
||||
"surfaceId": "main",
|
||||
"sourceComponentId": "btn_submit",
|
||||
"timestamp": "2026-04-25T18:32:11.123Z",
|
||||
"context": { "/email": "user@example.com" }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`context` is the (possibly partial) data model snapshot relevant to the source component, computed by walking the component's `dataBinding` paths and the surface's `sendDataModel` flag.
|
||||
|
||||
## 5. Component mapping (v0.8 standard catalog)
|
||||
|
||||
| A2UI component | WinUI 3 control | Notes |
|
||||
|---|---|---|
|
||||
| **Containers** | | |
|
||||
| `Row` | `StackPanel` (Horizontal) inside a wrap-aware `ItemsRepeater` when `wrap=true` | Match `bootstrap.js`'s wrap behavior at < 860 px |
|
||||
| `Column` | `StackPanel` (Vertical) | `min-width: 0` analog: clamp via `MinWidth=0` |
|
||||
| `List` | `ItemsRepeater` + `ItemsRepeaterScrollHost` | Virtualization on by default |
|
||||
| `Card` | `Border` with `Microsoft.UI.Xaml.Media.MicaBackdrop`-aware brush + corner radius + drop shadow | |
|
||||
| `Tabs` | `TabView` (controls) | Lightweight chrome to match Lit version |
|
||||
| `Modal` | `ContentDialog` (or full-window overlay `Grid` w/ `AcrylicBrush`) | Track Lit's full-screen overlay style — `dialog::backdrop` analog is `AcrylicBrush` over the parent |
|
||||
| **Display** | | |
|
||||
| `Text` | `TextBlock` | Map A2UI `style` (h1/h2/body/caption/etc.) to Fluent type ramp |
|
||||
| `Image` | `Image` w/ `BitmapImage` source; HTTP fetch via `HttpClient` with allowlist | Reject `file:`, `javascript:`, `data:` (except small `image/png|jpeg|webp`) |
|
||||
| `Icon` | `FontIcon` (Segoe Fluent Icons) keyed by name | Maintain a name→glyph map; missing → outlined question-mark |
|
||||
| `Video` | `MediaPlayerElement` | |
|
||||
| `AudioPlayer` | `MediaPlayerElement` w/ audio-only template | |
|
||||
| `Divider` | `Rectangle` (1px `SystemControlForegroundBaseLowBrush`) or `MenuFlyoutSeparator` style | |
|
||||
| **Interactive** | | |
|
||||
| `Button` | `Button` (variants → `AccentButtonStyle`, `DefaultButtonStyle`) | Triggers action with `name` |
|
||||
| `CheckBox` | `CheckBox` | Two-way bind to data model path |
|
||||
| `TextField` | `TextBox` (multiline → `TextBox.AcceptsReturn=true`) | `inputType` → `InputScope` mapping |
|
||||
| `DateTimeInput` | `CalendarDatePicker` + `TimePicker` (composed) | |
|
||||
| `ChoicePicker` | `ComboBox` (single) / `ListView` w/ `SelectionMode=Multiple` (multi) | |
|
||||
| `Slider` | `Slider` | |
|
||||
|
||||
Each mapping lives in a single `IComponentRenderer` implementation under `OpenClaw.Tray.WinUI/A2UI/Renderers/`. The set is closed at compile time (catalog-strict) — no runtime registration in v1.
|
||||
|
||||
## 6. Data model & binding
|
||||
|
||||
A2UI surfaces carry a JSON data model. Components reference paths into that model (`"/userName"`, `"/items/0/title"`).
|
||||
|
||||
### 6.1 Storage
|
||||
|
||||
`DataModelStore` holds one `JsonObject` per surface, mutated via JSON Pointer (RFC 6901) patches. Use `System.Text.Json.Nodes` for in-place edits (already a dependency).
|
||||
|
||||
### 6.2 Binding
|
||||
|
||||
Bindings are **one-way for display** components, **two-way for interactive** components. Implement via:
|
||||
|
||||
- `DataModelObservable` — wraps a `JsonObject` and exposes `INotifyPropertyChanged` per registered path.
|
||||
- `A2UIBinding` markup extension (or code-behind helpers) — produces `Binding` objects that target a path observer.
|
||||
|
||||
Why not raw `Microsoft.UI.Xaml.Data.Binding` paths? JSON paths can include array indices and slashes, which XAML binding paths don't model cleanly. A small adapter is simpler and faster than fighting the binding engine.
|
||||
|
||||
### 6.3 Patches
|
||||
|
||||
`updateDataModel.patch` is an object whose keys are JSON Pointer paths and whose values are replacement values. Apply atomically; coalesce notifications so multiple paths in one message produce a single render pass.
|
||||
|
||||
## 7. Action dispatch
|
||||
|
||||
Components that can produce actions register a callback:
|
||||
|
||||
```csharp
|
||||
internal sealed class ButtonRenderer : IComponentRenderer
|
||||
{
|
||||
public FrameworkElement Render(A2UIComponent c, RenderContext ctx)
|
||||
{
|
||||
var btn = new Button { Content = c.GetText("label") };
|
||||
btn.Click += (_, _) => ctx.Actions.Raise(new A2UIAction(
|
||||
name: c.GetString("actionName") ?? "press",
|
||||
surfaceId: ctx.SurfaceId,
|
||||
sourceComponentId: c.Id,
|
||||
context: ctx.DataModel.SnapshotFor(c)));
|
||||
return btn;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`ActionDispatcher.Raise` is the single seam through which actions leave the renderer. It handles:
|
||||
|
||||
1. Throttle/debounce (per `name` + `sourceComponentId`) to suppress double-clicks.
|
||||
2. Serialization to A2UI v0.8 client→server envelope.
|
||||
3. Routing: gateway WS first, then MCP notification, with a fallback queue if neither is available.
|
||||
|
||||
## 8. Theming
|
||||
|
||||
Default to `XamlControlsResources` + Fluent theme colors. The `createSurface.theme` payload may override:
|
||||
|
||||
- `colors`: map onto `ThemeResource` overrides applied to the `SurfaceHost` resource scope (no global mutation).
|
||||
- `typography`: optional font family override; respect Windows accessibility text scaling first.
|
||||
- `radius`, `spacing`: passed through to renderers via `RenderContext`.
|
||||
|
||||
Theme application is local to the surface's visual tree — switching themes between surfaces does not flash the chrome.
|
||||
|
||||
## 9. Lifecycle & hosting
|
||||
|
||||
### 9.1 Window
|
||||
|
||||
`A2UICanvasWindow` extends `Window`:
|
||||
|
||||
- One window total (singleton). Multiple surfaces stack as `TabView` items if `>1` is active; single surface fills the content area.
|
||||
- Title pulls from `createSurface.title` (new optional v0.8 field already used by openclaw) or defaults to "Canvas".
|
||||
- Window chrome: backdrop = `MicaBackdrop` on Win11, `AcrylicBackdrop` fallback.
|
||||
- Persistence: position/size remembered across sessions (per existing `CanvasWindow` settings keys; reuse where possible).
|
||||
|
||||
### 9.2 Threading
|
||||
|
||||
All renderer mutation runs on the UI dispatcher (`DispatcherQueue.GetForCurrentThread()`). The router accepts pushes from any thread and posts via `TryEnqueue`.
|
||||
|
||||
### 9.3 Reset
|
||||
|
||||
`canvas.a2ui.reset` (already wired through `A2UIResetRequested`) → `A2UIRouter.ResetAll()` → dispose every `SurfaceHost`, clear stores, re-show empty placeholder.
|
||||
|
||||
## 10. Security model
|
||||
|
||||
- **Catalog-strict.** Component types are baked in at compile time. There is no JS, no HTML escape, no `eval`. Unknown types render a placeholder.
|
||||
- **URL allowlist for media.** `Image`, `Video`, `AudioPlayer` URL fetches go through a single `MediaResolver` that:
|
||||
- Allows `https://` from a configurable allowlist (default: empty until set by the agent's surface theme/manifest).
|
||||
- Allows `data:image/png|jpeg|webp` up to 2 MiB.
|
||||
- Rejects everything else; renders broken-image glyph.
|
||||
- **Action context scoping.** `context` includes only data model paths the source component declares it reads (`dataBinding`), preventing accidental leak of unrelated form state.
|
||||
- **No file system or process access** from inside a surface. Those go through other capabilities (`system.run`, `screen.*`) which already have their own approval flow.
|
||||
- **Logging.** Each inbound message is logged at Info with surface ID + component count; PII fields in the data model SHOULD be redacted at log time using a path denylist (`/password`, `/secret*`, `/token`).
|
||||
|
||||
## 11. Telemetry
|
||||
|
||||
Mirror what `CanvasCapability` already logs:
|
||||
|
||||
- `a2ui.push` (count, jsonl byte length, surface IDs touched, render time ms)
|
||||
- `a2ui.action` (surface ID, action name, queue latency)
|
||||
- `a2ui.unknown_component` (type name) — to drive catalog upgrades
|
||||
- `a2ui.media_blocked` (URL scheme/host) — to tune the allowlist
|
||||
|
||||
## 12. Testing
|
||||
|
||||
- **Unit:** schema validation, JSON pointer apply, action serialization, component-to-XAML mapping per type.
|
||||
- **Visual regression:** golden images per component using WinAppDriver or a snapshot harness — gate on hash + tolerance.
|
||||
- **Spec conformance:** drive the renderer with the official v0.8 conformance fixtures from `vendor/a2ui/specification/0.8/eval/` (reused from the openclaw monorepo) and assert action outputs match expected.
|
||||
- **Stress:** 10k component surface, 1k updateComponents/sec → renderer must not block the UI thread > 16 ms p95.
|
||||
- **Parity:** record the JSONL stream of an existing Lit-rendered openclaw surface, replay through the WinUI renderer, diff screenshots.
|
||||
|
||||
## 13. Phasing
|
||||
|
||||
| Phase | Scope | Exit criteria |
|
||||
|---|---|---|
|
||||
| **0 — Spike** | `Text`, `Column`, `Button` only; one surface; no data model | Single button click round-trips to agent |
|
||||
| **1 — Catalog parity** | All v0.8 standard catalog types; data model + bindings; modal/tabs | Full conformance fixtures pass |
|
||||
| **2 — Polish** | Theming, transitions, focus management, accessibility (Narrator), keyboard nav | A11y audit clean; UX review against Lit version |
|
||||
| **3 — Coexistence** | Native window default; WebView2 path retained behind `--canvas=web` flag for parity testing | No regressions in WebView2 path |
|
||||
| **4 — v0.9 migration** | Bidirectional messages, modular schemas, prompt-first | Tracks Google A2UI v0.9 release |
|
||||
|
||||
## 14. Open questions
|
||||
|
||||
> Resolved 2026-04-27 — see decisions below; previous wording preserved for context.
|
||||
|
||||
1. **Window count.** One A2UI window with tabs for multiple surfaces, or one window per surface? Lit version uses one host with multiple stacked surfaces.
|
||||
**Decision:** stay with the Lit-compatible single-window-with-tabs layout. Multiple windows is out of scope for v1.
|
||||
2. **Component overrides.** Should we expose a hook for downstream apps to swap in custom renderers?
|
||||
**Decision:** stay catalog-strict for v1. No extension seam yet — easy to add later if a real customer asks.
|
||||
3. **Theme negotiation.** Should the agent be told "I'm a native WinUI client, prefer Fluent tokens" via `clientCapabilities`?
|
||||
**Decision:** yes — advertise Fluent token preference in `clientCapabilities`. (Tracking task: wire this into the capability summary returned by `canvas.caps`.)
|
||||
4. **Animation budget.** Define a small transition set (fade, slide) and apply automatically, or stay still?
|
||||
**Decision:** stay still until the agent asks. No automatic transitions in v1.
|
||||
5. **Image caching.** Per-surface, per-process, or persistent?
|
||||
**Decision:** per-process LRU. Avoids the repeated-fetch cost of per-surface and the staleness risk of persistent disk caching.
|
||||
|
||||
## 15. References
|
||||
|
||||
- A2UI v0.8 spec: <https://a2ui.org/specification/v0.8-a2ui/>
|
||||
- v0.8 JSON schemas (vendored): `openclaw/vendor/a2ui/specification/0.8/json/`
|
||||
- Reference Lit renderer: `openclaw/vendor/a2ui/renderers/lit/`
|
||||
- Current Windows node A2UI bridge: `src/OpenClaw.Tray.WinUI/Windows/CanvasWindow.xaml.cs` (`EnsureA2UIHostAsync`, `BuildA2UIMessageScript`)
|
||||
- Current capability surface: `src/OpenClaw.Shared/Capabilities/CanvasCapability.cs`
|
||||
- Android handler (good reference for v0.8 validation rules): `openclaw/apps/android/.../A2UIHandler.kt`
|
||||
@ -7,10 +7,7 @@ OpenClaw Tray uses WinUI `.resw` resource files for localization. Windows automa
|
||||
| Language | Locale | Resource File |
|
||||
|----------|--------|---------------|
|
||||
| English (US) | `en-us` | `Strings/en-us/Resources.resw` |
|
||||
| French (France) | `fr-fr` | `Strings/fr-fr/Resources.resw` |
|
||||
| Dutch (Netherlands) | `nl-nl` | `Strings/nl-nl/Resources.resw` |
|
||||
| Chinese (Simplified) | `zh-cn` | `Strings/zh-cn/Resources.resw` |
|
||||
| Chinese (Traditional) | `zh-tw` | `Strings/zh-tw/Resources.resw` |
|
||||
|
||||
## Adding a New Language
|
||||
|
||||
@ -46,7 +43,7 @@ OpenClaw Tray uses WinUI `.resw` resource files for localization. Windows automa
|
||||
|
||||
5. **Do not translate resource key names** (the `name` attribute). Only translate `<value>` content.
|
||||
|
||||
6. **Submit a pull request** with just your new `Resources.resw` file. No code changes are needed — the build system and localization tests automatically discover new locale folders.
|
||||
6. **Submit a pull request** with just your new `Resources.resw` file. No code changes are needed — the build system automatically discovers new locale folders.
|
||||
|
||||
## How It Works
|
||||
|
||||
@ -68,16 +65,16 @@ Windows picks the language automatically based on the user's OS display language
|
||||
|
||||
## Testing a Language Locally
|
||||
|
||||
Set the `OPENCLAW_LANGUAGE` environment variable before launching the app:
|
||||
To test a specific locale without changing your Windows language:
|
||||
|
||||
```powershell
|
||||
$env:OPENCLAW_LANGUAGE = "fr-fr" # or nl-nl, zh-cn, zh-tw
|
||||
.\src\OpenClaw.Tray.WinUI\bin\Debug\net10.0-windows10.0.19041.0\win-x64\OpenClaw.Tray.WinUI.exe
|
||||
```
|
||||
1. Open `src/OpenClaw.Tray.WinUI/App.xaml.cs`
|
||||
2. Add this line at the top of the `App()` constructor, **before** `InitializeComponent()`:
|
||||
```csharp
|
||||
LocalizationHelper.SetLanguageOverride("zh-CN");
|
||||
```
|
||||
3. Build and run (`dotnet build src/OpenClaw.Tray.WinUI -r win-x64`). Remove the line when done testing.
|
||||
|
||||
This overrides `LocalizationHelper.GetString()` calls for menus, toasts, dialogs, and the onboarding wizard. The language is validated against the supported locale list.
|
||||
|
||||
> **Note:** XAML `x:Uid` bindings follow the OS display language. For full localization testing including XAML elements, change your Windows display language in Settings → Time & Language.
|
||||
> **Note:** This overrides `LocalizationHelper.GetString()` calls (menus, toasts, dialogs, window titles). XAML `x:Uid` bindings follow the OS display language. For full XAML localization testing, change your Windows display language in Settings → Time & Language.
|
||||
|
||||
## Resource Key Naming Conventions
|
||||
|
||||
@ -90,31 +87,12 @@ This overrides `LocalizationHelper.GetString()` calls for menus, toasts, dialogs
|
||||
| `Status_Name` | Status display text | `Status_Connected` |
|
||||
| `TimeAgo_Format` | Relative time strings | `TimeAgo_MinutesFormat` |
|
||||
|
||||
### Onboarding Key Namespace
|
||||
|
||||
All onboarding wizard strings use the `Onboarding_` prefix:
|
||||
|
||||
| Pattern | Used For | Example |
|
||||
|---------|----------|---------|
|
||||
| `Onboarding_PageName_Label` | Page titles, descriptions | `Onboarding_Welcome_Title` |
|
||||
| `Onboarding_Connection_*` | Connection page labels/status | `Onboarding_Connection_TestConnection` |
|
||||
| `Onboarding_Perm_*` | Permission names | `Onboarding_Perm_Camera` |
|
||||
| `Onboarding_Ready_*` | Ready page elements | `Onboarding_Ready_Feature_Voice_Subtitle` |
|
||||
| `Onboarding_Wizard_*` | Wizard page elements | `Onboarding_Wizard_Continue` |
|
||||
|
||||
## Validation
|
||||
|
||||
All resource files must have the **same set of keys**. Locale directories are discovered dynamically under `Strings/`, so adding a new `Strings/<locale>/Resources.resw` file automatically brings it under validation. You can verify counts with:
|
||||
Both resource files must have the **same set of keys**. You can verify with:
|
||||
|
||||
```powershell
|
||||
$base = "src\OpenClaw.Tray.WinUI\Strings"
|
||||
Get-ChildItem $base -Directory | ForEach-Object {
|
||||
$loc = $_.Name
|
||||
$count = (Select-String -Path "$base\$loc\Resources.resw" -Pattern '<data name="' | Measure-Object).Count
|
||||
Write-Host "$loc : $count keys"
|
||||
}
|
||||
$en = (Select-String -Path "src\OpenClaw.Tray.WinUI\Strings\en-us\Resources.resw" -Pattern '<data name="' | Measure-Object).Count
|
||||
$new = (Select-String -Path "src\OpenClaw.Tray.WinUI\Strings\<locale>\Resources.resw" -Pattern '<data name="' | Measure-Object).Count
|
||||
Write-Host "en-us: $en keys | <locale>: $new keys | Match: $($en -eq $new)"
|
||||
```
|
||||
|
||||
All locale counts should match. Missing or extra keys indicate an incomplete translation.
|
||||
|
||||
Non-English resource values must also follow the all-or-none rule enforced by `LocalizationValidationTests`: each key is either translated in every non-English locale, intentionally invariant in every non-English locale, or explicitly deferred with rationale. Partial translation, where only some non-English locales differ from `en-us`, is treated as a regression.
|
||||
|
||||
279
docs/MCP_MODE.md
279
docs/MCP_MODE.md
@ -1,279 +0,0 @@
|
||||
# Local MCP Mode
|
||||
|
||||
**Status:** Implemented (initial cut). See `src/OpenClaw.Shared/Mcp/`, `src/OpenClaw.Shared/Mcp/McpHttpServer.cs`, and the Settings UI MCP section.
|
||||
|
||||
## Summary
|
||||
|
||||
The Windows tray app now ships a **local Model Context Protocol (MCP) server** alongside its existing OpenClaw gateway client. The same node capabilities the agent reaches over the OpenClaw gateway WebSocket — `system.run`, `screen.snapshot`, `canvas.*`, `camera.list`, `camera.snap`, `camera.clip`, `location.get`, `tts.speak`, `system.notify`, `system.execApprovals.*` — are advertised, on the same machine, as MCP tools over `http://127.0.0.1:8765/`.
|
||||
|
||||
This means any local MCP client (Claude Desktop, Claude Code, Cursor, an MCP-aware CLI, a custom dev script) can reach into the running tray and drive Windows-native capabilities directly, without an OpenClaw gateway in the loop. The tray app can run in **MCP-only mode** with no gateway connection at all.
|
||||
|
||||
The implementation is structured so that **adding a new node capability automatically exposes it via MCP** — no MCP-side code changes required. That is the central design constraint and the main reason we built MCP in-process rather than as a separate adapter.
|
||||
|
||||
## Goals
|
||||
|
||||
1. **Single source of truth for capabilities.** A new `INodeCapability` registered with `WindowsNodeClient.RegisterCapability(...)` is reachable via every transport the tray supports. Today: gateway WebSocket and local MCP HTTP. Future transports (named pipe, gRPC, whatever) plug in the same way.
|
||||
2. **Local-first development.** Capabilities can be exercised on Windows without standing up an OpenClaw gateway, without an account, without auth, without a tunnel.
|
||||
3. **Make MCP clients first-class consumers** of the OpenClaw native node, not afterthoughts. The tooling investment in capabilities (camera consent flows, exec approval policy, canvas WebView2 plumbing) pays off in both directions: agent-via-gateway and agent-via-local-MCP.
|
||||
|
||||
## Non-goals (for this iteration)
|
||||
|
||||
- **No remote authentication.** Loopback bind + Origin/Host checks keep the endpoint unreachable from any other machine. A local bearer token guards against untrusted local processes on the same box (see [Authentication](#authentication) below). We will revisit ACLs / multi-user when we want remote MCP, multiple users on one box, or shared dev VMs.
|
||||
- **No SSE / streaming.** Plain JSON-RPC request/response is enough for the synchronous capabilities we have today.
|
||||
- **No per-tool input schemas.** Capabilities don't expose schemas; MCP `inputSchema` is permissive (`{type: "object", additionalProperties: true}`). When/if `INodeCapability` grows a schema property, the MCP bridge picks it up with no other changes.
|
||||
- **No port configuration UI.** Default `8765` is hardcoded. Easy to lift into `SettingsManager` later.
|
||||
|
||||
## Architecture
|
||||
|
||||
### Single capability registry, two transports
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ NodeService │
|
||||
│ │
|
||||
│ List<INodeCapability> _capabilities ◄───┐ │
|
||||
│ │ │
|
||||
│ private void Register(INodeCapability) │ │
|
||||
│ { │ │
|
||||
│ _capabilities.Add(cap); │ │
|
||||
│ _nodeClient?.RegisterCapability(cap)│ │
|
||||
│ } │ │
|
||||
└────┬───────────────────────┬──────────────┘─┘
|
||||
│ │
|
||||
│ │
|
||||
▼ ▼
|
||||
┌─────────────────────┐ ┌─────────────────────┐
|
||||
│ WindowsNodeClient │ │ McpToolBridge │
|
||||
│ (gateway WebSocket) │ │ (JSON-RPC dispatch) │
|
||||
└─────────┬───────────┘ └─────────┬───────────┘
|
||||
│ │
|
||||
▼ ▼
|
||||
OpenClaw gateway McpHttpServer
|
||||
(HttpListener@127.0.0.1:8765)
|
||||
│
|
||||
▼
|
||||
Local MCP clients
|
||||
(Claude Code, Cursor, etc.)
|
||||
```
|
||||
|
||||
The capability list lives on `NodeService`, *not* on `WindowsNodeClient`. That single change is what makes MCP-only mode possible: the gateway client is now optional. When it exists, `Register(cap)` pushes capabilities into both the local list and the gateway client's registration message. When it doesn't (MCP-only), capabilities still populate the local list and the MCP bridge serves them.
|
||||
|
||||
### MCP bridge
|
||||
|
||||
`OpenClaw.Shared/Mcp/McpToolBridge.cs` is transport-agnostic JSON-RPC 2.0. It implements:
|
||||
|
||||
- `initialize` — protocol version `2024-11-05`, server info.
|
||||
- `tools/list` — flattens `_capabilities` into MCP tools. Tool name = command name (`"screen.snapshot"`); description = `"{category} capability: {command}"`; `inputSchema` is permissive.
|
||||
- `tools/call` — finds the capability via `INodeCapability.CanHandle(name)`, builds a `NodeInvokeRequest` (the same struct the gateway path uses), calls `ExecuteAsync`, wraps the result as MCP `content[].text`. Tool failures come back as `result.isError = true`, not JSON-RPC errors (per MCP spec — JSON-RPC errors are reserved for protocol issues).
|
||||
- `ping`, `notifications/initialized` — protocol housekeeping.
|
||||
|
||||
The bridge takes a `Func<IReadOnlyList<INodeCapability>>` rather than a snapshot. Every `tools/list` re-reads the live list. This is what guarantees zero-cost capability addition — register a new capability after server start and it appears on the next `tools/list`.
|
||||
|
||||
### HTTP transport
|
||||
|
||||
`OpenClaw.Shared/Mcp/McpHttpServer.cs` is `System.Net.HttpListener` bound to `http://127.0.0.1:8765/`. Loopback-only by construction; not reachable from any other machine even with firewall holes. A defensive `IPAddress.IsLoopback` check on each request acts as belt-and-suspenders.
|
||||
|
||||
`GET /` returns a friendly text probe. `POST /` is JSON-RPC. Anything else → `405`. When a bearer token is configured, every verb must pass the token gate before method dispatch.
|
||||
|
||||
## Authentication
|
||||
|
||||
The HTTP transport requires a bearer token on every request. Defense-in-depth on top of loopback bind + Origin/Host checks: if an attacker can run code in *any* local user context they can reach `127.0.0.1:8765`, so we don't want the listener to be open-by-construction.
|
||||
|
||||
**Where the token lives.** `%APPDATA%\OpenClawTray\mcp-token.txt`. The exact path is composed by `NodeService.McpTokenPath` from `SettingsManager.SettingsDirectoryPath`, so the test-suite override `OPENCLAW_TRAY_DATA_DIR` isolates the token file too. The file inherits the parent directory's ACL — by default only the current user (and SYSTEM/Administrators) can read it.
|
||||
|
||||
**When it's created.** Lazily, on the first `NodeService.StartMcpServer()` call — i.e. the first time the user enables Local MCP Server in Settings and saves. **Until that toggle has been on at least once, the file does not exist.** This trips up users who try to grab the token before flipping the switch.
|
||||
|
||||
**How long it is.** 32 bytes of CSPRNG output, base64url-encoded with padding stripped → **43 ASCII characters** (~256 bits of entropy). See `McpAuthToken.Generate()`.
|
||||
|
||||
**Lifetime.** The token is **persistent across tray restarts**. It's only regenerated if the file is deleted or its contents are emptied. There is no automatic rotation.
|
||||
|
||||
**On the wire.** Every request must carry `Authorization: Bearer <token>` when the server has a configured token. Missing or wrong token → `401 Unauthorized` with no body. `GET /` remains a "yes I'm here" probe after auth passes.
|
||||
|
||||
**How users find it.** Settings → Developer Mode → MCP section shows the live token (masked, with Reveal/Copy buttons) and the storage path. For agents that read from disk (Claude Code, custom scripts), pointing them at `McpTokenPath` is preferable to embedding the token in their prompt or config — the path is stable, the token is a secret. For agents that only accept literal bearer values in config (Claude Desktop, Cursor), use Copy.
|
||||
|
||||
### Settings model
|
||||
|
||||
Two independent toggles in `SettingsData`:
|
||||
|
||||
```csharp
|
||||
public bool EnableNodeMode { get; set; } // open WebSocket to gateway
|
||||
public bool EnableMcpServer { get; set; } // run local MCP HTTP server
|
||||
```
|
||||
|
||||
| `EnableNodeMode` | `EnableMcpServer` | Result |
|
||||
|---|---|---|
|
||||
| off | off | Operator-only (legacy default) |
|
||||
| off | on | **MCP server only, no gateway** |
|
||||
| on | off | Gateway node, no MCP |
|
||||
| on | on | Gateway node + MCP |
|
||||
|
||||
Settings UI exposes both toggles in the Advanced section, with the live MCP endpoint URL and current status (`Listening` / `Stopped — save and restart to start` / `Disabled`).
|
||||
|
||||
A legacy `McpOnlyMode` field is migrated automatically on load and never re-written.
|
||||
|
||||
## Why this matters
|
||||
|
||||
### Testing
|
||||
|
||||
The tray's most interesting code lives in capabilities — `system.run` (LocalCommandRunner + ExecApprovalPolicy), `screen.snapshot` (Windows.Graphics.Capture + GraphicsCapturePicker), `canvas.*` (WebView2 with trusted origin enforcement), `camera.snap`/`camera.clip` (MediaCapture + consent prompt), `location.get` (Windows.Devices.Geolocation). All of that has nontrivial Windows-only behavior and almost none of it is currently exercised end-to-end without first standing up a gateway and authenticating.
|
||||
|
||||
Local MCP changes that. Concrete benefits:
|
||||
|
||||
- **Manual smoke tests in seconds.** `curl -s -X POST http://127.0.0.1:8765/ -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}'` validates that the capability dispatch path works, the WinUI dispatcher marshaling is correct, the result shape matches expectations. No gateway, no token, no SSH tunnel.
|
||||
- **Reproducible bug reports.** A repro becomes a `tools/call` body the bug filer can paste verbatim. No "what was the gateway doing at the time."
|
||||
- **Integration tests against a real instance.** A future `tests/integration/` project can spin up the tray in MCP-only mode, fire JSON-RPC, assert results. The same test bodies a developer runs by hand are the same ones CI runs. (Harnessing WinUI itself in CI is harder, but the bridge logic — `McpToolBridge` — is already covered by `McpToolBridgeTests` with no UI involvement.)
|
||||
- **Coverage for the dispatch path itself.** `WindowsNodeClient`'s capability-routing logic (`CanHandle` → `ExecuteAsync`) was previously only exercised against a live gateway. The MCP server hits the same code paths, so any local MCP test is implicit coverage of the gateway dispatch.
|
||||
- **Bridge unit tests already exist.** `tests/OpenClaw.Shared.Tests/McpToolBridgeTests.cs` (9 cases) covers initialize, tools/list, runtime capability registration, tool calls, unknown tools, capability failures, JSON-RPC unknown method, notifications, and parse errors. These are pure C# unit tests with fake capabilities — no HTTP, no UI, no gateway.
|
||||
|
||||
### Access from CLIs and agents
|
||||
|
||||
The exact same node tools the OpenClaw gateway uses are now invocable by any local MCP-aware client:
|
||||
|
||||
- **Claude Code** (this CLI). Add to `~/.claude.json` or per-project `.mcp.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"openclaw-tray": {
|
||||
"type": "http",
|
||||
"url": "http://127.0.0.1:8765/"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The agent then sees `screen.snapshot`, `system.run`, `canvas.*`, etc. as tools, with whatever arguments the capability accepts.
|
||||
|
||||
- **Claude Desktop.** Same config shape under MCP servers.
|
||||
- **Cursor.** Same.
|
||||
- **GitHub Copilot CLI / Copilot in the terminal.** As MCP support lands in those clients, the endpoint is already there.
|
||||
- **Custom dev scripts.** Anything that can speak HTTP + JSON-RPC. A 30-line Python or Node helper can drive the entire capability surface.
|
||||
|
||||
In all cases the user gets a Windows-native agent experience without OpenClaw infrastructure. They can be entirely offline w.r.t. an OpenClaw gateway and still hand the LLM a working set of "do something on my Windows box" tools.
|
||||
|
||||
### Dev acceleration when building new features
|
||||
|
||||
This is the strongest argument for making MCP a first-class citizen, not an afterthought.
|
||||
|
||||
When a contributor adds a new capability — say, `clipboard.read`, `clipboard.write`, `windows.list`, `audio.transcribe`, `git.status`, `office.draft_email` — today the workflow looks like:
|
||||
|
||||
1. Implement `INodeCapability`.
|
||||
2. Wire it into `NodeService.RegisterCapabilities()`.
|
||||
3. Stand up a gateway, authenticate, pair the device, etc., to test.
|
||||
4. Drive the capability from within an agent conversation, observing logs and taking screenshots to confirm correctness.
|
||||
|
||||
With MCP in-process the workflow shortens to:
|
||||
|
||||
1. Implement `INodeCapability`.
|
||||
2. Wire it into `NodeService.RegisterCapabilities()`.
|
||||
3. Restart the tray. The new tool is *immediately* visible to any local MCP client (`tools/list` re-reads the registry every call), and to manual `curl` tests.
|
||||
|
||||
The dev loop for capabilities is now identical to the dev loop for any local HTTP server: edit, restart, hit the endpoint, observe. No gateway, no agent, no auth.
|
||||
|
||||
This compounds when you stack it with Claude Code or Cursor on the same machine. A contributor can:
|
||||
|
||||
- Open the repo in their IDE.
|
||||
- Run the tray with `EnableMcpServer = true`.
|
||||
- Have Claude Code connected to the same MCP endpoint.
|
||||
- Iterate on a new capability while the agent — using that very capability — helps drive the iteration. The capability under development can be invoked by the assistant on the next turn after a tray restart. That's a tight self-hosted feedback loop.
|
||||
|
||||
It also reduces the cost of "speculative" capabilities. Today, adding a capability has a tax: it must be useful enough to justify the extra surface in the gateway/agent stack. With local MCP, a contributor can build a capability speculatively, validate it against their own MCP-aware agent, and only later decide whether to formalize it for gateway use. That lowers the bar for experimentation.
|
||||
|
||||
## Security model
|
||||
|
||||
The server is built on **three** defensive layers, not just one. Loopback alone is *not* sufficient — a browser tab the user opens is also on the loopback interface, so a malicious page could otherwise reach `http://127.0.0.1:8765/` directly.
|
||||
|
||||
1. **Loopback bind.** `HttpListener` is registered with the prefix `http://127.0.0.1:8765/`. The Windows kernel binds the listening socket to the loopback interface only — packets from other interfaces are not delivered to it. Firewall configuration is irrelevant. Defends against: another machine on the network.
|
||||
2. **Defensive `IsLoopback` check.** Each incoming request validates `ctx.Request.RemoteEndPoint.Address`. Belt-and-suspenders for #1.
|
||||
3. **CSRF / browser gate.** Each request is rejected if any of the following holds:
|
||||
- the request carries an `Origin` header (real MCP clients — Claude Desktop, Cursor, Claude Code, curl — never send `Origin`; browsers always do for cross-origin fetches);
|
||||
- the `Host` header is anything other than `127.0.0.1[:port]` or `localhost[:port]` (defends against DNS-rebinding pivots);
|
||||
- on `POST`, the `Content-Type` is anything other than `application/json` (forces a CORS preflight from a browser, which we never satisfy).
|
||||
- the request body exceeds 4 MiB (DoS / OOM cap).
|
||||
|
||||
Together these three checks force a malicious cross-origin browser fetch into a CORS preflight that we deliberately do not honor (no `Access-Control-Allow-*` is ever emitted), so the actual call is blocked before reaching capability code.
|
||||
4. **Concurrency cap.** A semaphore limits in-flight handlers to 8. A misbehaving local client cannot pin every threadpool thread on long-running screen/camera calls.
|
||||
5. **Capability-level controls remain in force.** `SystemCapability.SetApprovalPolicy(...)` (the exec approval policy) still gates `system.run`. Camera and screen capture still go through Windows consent flows. MCP doesn't bypass any of those.
|
||||
|
||||
**Still no authentication.** Any user-context local process with a TCP socket and the port number can drive any capability. This is the same trust boundary as anything that runs as the user — a malicious process on the box could already invoke arbitrary Win32 APIs without going through MCP. We don't try to stop user-context processes from talking to MCP. If that turns out to matter (multi-user shared boxes, low-trust local processes), the right answer is per-call bearer tokens issued by the tray (one-time copy-to-clipboard from the Settings UI), not URL ACLs or HTTPS — both add deployment pain without solving the actual problem.
|
||||
|
||||
### Verifying the gate
|
||||
|
||||
These should all be **rejected** with `403 Forbidden`:
|
||||
|
||||
```powershell
|
||||
# Browser pretending to come from another origin
|
||||
curl -X POST http://127.0.0.1:8765/ -H "Origin: https://evil.com" -H "Content-Type: application/json" -d '{}'
|
||||
|
||||
# DNS rebinding attempt
|
||||
curl -X POST http://127.0.0.1:8765/ -H "Host: evil.com" -H "Content-Type: application/json" -d '{}'
|
||||
```
|
||||
|
||||
This should be **rejected** with `415`:
|
||||
|
||||
```powershell
|
||||
curl -X POST http://127.0.0.1:8765/ -H "Content-Type: text/plain" --data '{"jsonrpc":"2.0","id":1,"method":"ping"}'
|
||||
```
|
||||
|
||||
These should **succeed**:
|
||||
|
||||
```powershell
|
||||
curl http://127.0.0.1:8765/ -H "Authorization: Bearer <token>" # GET probe
|
||||
curl -X POST http://127.0.0.1:8765/ -H "Authorization: Bearer <token>" -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1,"method":"ping"}'
|
||||
```
|
||||
|
||||
## What's deliberately deferred
|
||||
|
||||
These are reasonable next steps but explicitly out of scope for the initial implementation:
|
||||
|
||||
1. **Per-tool input schemas.** Add an `IReadOnlyDictionary<string, JsonElement> InputSchemas` (or per-command descriptor) to `INodeCapability`. The MCP bridge's `HandleToolsList` picks them up automatically. Until then, MCP clients see permissive schemas and the agent has to figure out arg shapes from descriptions and trial-and-error.
|
||||
2. ~~**Authentication.**~~ Implemented. See [Authentication](#authentication) below.
|
||||
3. **Streamable HTTP / SSE.** For long-running tools (`screen.record`, future `audio.transcribe`), MCP supports streaming progress. The bridge needs to learn about it and the HTTP server needs to optionally upgrade.
|
||||
4. **Resource and prompt support.** MCP has `resources/*` and `prompts/*` methods we currently no-op. Notifications, recent activity, channel state could be modeled as MCP resources.
|
||||
5. **Configurable port.** Move `McpDefaultPort` into `SettingsManager`. Probably also pick a free port at startup if the default is in use, and surface the actual port in the Settings UI.
|
||||
6. **Setup Wizard step.** Today the Settings Advanced section is the only way to enable MCP. The Setup Wizard could offer it as a one-click option, especially attractive for users who don't run a gateway at all.
|
||||
|
||||
## File map
|
||||
|
||||
| File | Role |
|
||||
|---|---|
|
||||
| `src/OpenClaw.Shared/Mcp/McpToolBridge.cs` | Transport-agnostic JSON-RPC dispatcher. |
|
||||
| `src/OpenClaw.Shared/SettingsData.cs` | Settings JSON model. Adds `EnableMcpServer`; deprecates `McpOnlyMode`. |
|
||||
| `src/OpenClaw.Shared/Mcp/McpHttpServer.cs` | `HttpListener`-based loopback HTTP transport. |
|
||||
| `src/OpenClaw.Tray.WinUI/Services/NodeService.cs` | Owns the capability list. Hosts the MCP server when enabled. |
|
||||
| `src/OpenClaw.Tray.WinUI/Services/SettingsManager.cs` | In-memory settings model + load/save. Migrates legacy `McpOnlyMode`. |
|
||||
| `src/OpenClaw.Tray.WinUI/Pages/SettingsPage.xaml(.cs)` | Settings UI surface hosted by `HubWindow`. |
|
||||
| `src/OpenClaw.Tray.WinUI/App.xaml.cs` | Bootstraps `NodeService` based on the new mode matrix. |
|
||||
| `tests/OpenClaw.Shared.Tests/McpToolBridgeTests.cs` | 9 unit tests for the bridge. |
|
||||
|
||||
## Quick verification
|
||||
|
||||
With the tray running and `EnableMcpServer = true`:
|
||||
|
||||
```powershell
|
||||
# Server is up
|
||||
curl http://127.0.0.1:8765/
|
||||
|
||||
# List tools
|
||||
curl -s -X POST http://127.0.0.1:8765/ `
|
||||
-H "Content-Type: application/json" `
|
||||
-d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}'
|
||||
|
||||
# Take a screenshot of the primary monitor
|
||||
curl -s -X POST http://127.0.0.1:8765/ `
|
||||
-H "Content-Type: application/json" `
|
||||
-d '{"jsonrpc":"2.0","id":2,"method":"tools/call","params":{"name":"screen.snapshot"}}'
|
||||
```
|
||||
|
||||
For Claude Code, drop this into `.mcp.json` at the repo root or `~/.claude.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"openclaw-tray": {
|
||||
"type": "http",
|
||||
"url": "http://127.0.0.1:8765/"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
@ -1,554 +0,0 @@
|
||||
# Mission Control: Topology-Aware Command Center Plan
|
||||
|
||||
This plan turns the Windows tray from a "connected/not connected" companion into a Mission Control surface for any OpenClaw gateway topology. It is based on a deep audit of:
|
||||
|
||||
- Current Windows code in this repository on `mission-control-audit`.
|
||||
- Current upstream Mac app code in `openclaw/apps/macos/Sources/OpenClaw`.
|
||||
- Current upstream gateway, node policy, browser proxy, health, presence, usage, pairing, and discovery code in `openclaw/src` and `openclaw/extensions`.
|
||||
|
||||
The main product decision is deliberate: **do not make a native Windows gateway the center of gravity.** The Windows app should be a first-class node and command center for any OpenClaw gateway: Mac over SSH tunnel, WSL, Windows Node.js, LAN, Tailscale, or unknown/remote.
|
||||
|
||||
## 1. Goals
|
||||
|
||||
1. Make the Windows tray explain *what* it is connected to: local gateway, WSL gateway, Mac via SSH tunnel, Tailscale/LAN gateway, or unknown remote.
|
||||
2. Reach deeper Mac parity by porting the valuable Mac "mission control" ideas, not just matching command names.
|
||||
3. Keep OpenClaw open and topology-neutral: the tray should observe, classify, diagnose, and repair; it should not force one gateway hosting model.
|
||||
4. Prioritize privacy and safety. Diagnostics must not trigger camera, screen recording, microphone, or broad command execution.
|
||||
5. Make every repair action copyable, explainable, and topology-aware.
|
||||
|
||||
## 2. Audit findings
|
||||
|
||||
### 2.1 Windows current state
|
||||
|
||||
Windows now has a strong foundation:
|
||||
|
||||
- Node Mode with canvas, camera, screen snapshot/record, location, device info/status, system commands, notifications, and exec approval policy.
|
||||
- Command Center status detail window with channels, sessions, usage, local/operator node inventory, allowlist diagnostics, pairing warnings, and activity stream.
|
||||
- SSH tunnel settings and service.
|
||||
- Activity Stream and support-bundle copy path that avoid storing invoke payloads.
|
||||
- Deep links including `openclaw://commandcenter`.
|
||||
|
||||
The biggest missing model is not another gateway implementation. It is **topology state**. Current settings collapse all topologies into:
|
||||
|
||||
- `GatewayUrl`
|
||||
- `UseSshTunnel`
|
||||
- SSH host/user/ports
|
||||
- `EnableNodeMode`
|
||||
|
||||
There is no first-class concept of "Mac over SSH", "WSL", "Windows native", "Tailscale", "LAN", or "unknown".
|
||||
|
||||
### 2.2 Mac Mission Control behaviors worth porting
|
||||
|
||||
The Mac app is not just a menu bar icon. It is a gateway/node/control-plane cockpit.
|
||||
|
||||
Important Mac surfaces:
|
||||
|
||||
- Status icon with activity badge and gateway error dot.
|
||||
- Hover HUD with current status and last tool/activity.
|
||||
- WebChat, Canvas, Settings, Onboarding, Agent Events, Notify Overlay, Voice/Talk overlays.
|
||||
- Menu sections for sessions, usage, cost, nodes, gateway discovery, channel state, browser control, camera/canvas/voice toggles, exec approvals, debug actions, and update status.
|
||||
- Per-session submenus with preview, thinking/verbose settings, reset, compact, delete, and log opening.
|
||||
- Per-node submenus with copy actions for node ID, name, IP, platform, versions, caps, and commands.
|
||||
- Channel settings driven by gateway schemas and channel health/probe details.
|
||||
- Debug/diagnostic actions: health check, test heartbeat, open logs, open config, open session store, restart gateway, reset SSH tunnel, port diagnostics, kill process by PID, rolling JSONL diagnostics, and verbose logging.
|
||||
|
||||
Important Mac gateway lifecycle pieces:
|
||||
|
||||
- `GatewayProcessManager` state machine: stopped, starting, running, attachedExisting, failed.
|
||||
- Attach-existing path before spawning a gateway.
|
||||
- `GatewayEnvironment`: Node runtime, OpenClaw CLI location/version, port/bind resolution.
|
||||
- `PortGuardian`: identifies listeners on gateway ports, classifies expected vs unexpected processes, and can kill with confirmation.
|
||||
- `GatewayEndpointStore`: async-stream state for local/remote/unconfigured endpoint readiness.
|
||||
- Gateway discovery via Bonjour/SRV plus Tailscale selection rules.
|
||||
- Remote SSH tunnel actor with robust SSH options, fast-fail check, random local port fallback, and tunnel reuse across app restarts.
|
||||
- Control channel with friendly error mapping and recovery scheduling.
|
||||
- Presence reporter every 180 seconds with host/IP/mode/version/platform/device fields.
|
||||
|
||||
Important Mac security/privacy pieces:
|
||||
|
||||
- Permission matrix for notifications, automation, accessibility, screen recording, microphone, speech, camera, and location.
|
||||
- Onboarding security banner warning that agents can run commands, read/write files, and capture screenshots.
|
||||
- Exec approval UX with Deny / Allow Once / Allow Always.
|
||||
- Command display sanitizer for control chars, invisible characters, and non-ASCII spaces to prevent spoofing.
|
||||
- Glob allowlist matcher semantics.
|
||||
- Host environment sanitizer with large inherited secret/toolchain blocklist, PATH override rejection, and shell-wrapper allowlist.
|
||||
- Exec approval edits with base-hash optimistic concurrency: **implemented for `system.execApprovals.get/set`; stale remote writes are rejected**
|
||||
- Pairing prompt with name, node ID, platform, app, IP, and approve/reject/later actions.
|
||||
|
||||
### 2.3 Gateway and browser proxy findings
|
||||
|
||||
`browser.proxy` is the main concrete remaining Mac node command gap.
|
||||
|
||||
Gateway/browser facts:
|
||||
|
||||
- `browser.proxy` is a canonical node command and included in Windows platform defaults at the gateway policy level.
|
||||
- Gateway policy still requires both gates:
|
||||
- command allowed by platform defaults or `gateway.nodes.allowCommands`
|
||||
- command declared by the node
|
||||
- The browser plugin/node-host contract is:
|
||||
- input: `method`, `path`, optional `query`, `body`, `timeoutMs`, `profile`
|
||||
- default timeout: 20 seconds
|
||||
- output: `{ result, files? }`
|
||||
- files are base64 payloads with path/mime metadata
|
||||
- Persistent profile mutations are blocked at gateway and node-host levels.
|
||||
- Mac implements `browser.proxy` only for local mode, proxying to `127.0.0.1:{gatewayPort+2}` with Bearer or `x-openclaw-password` auth, and a 10 MB/file extraction cap.
|
||||
- Windows managed SSH tunnel mode now forwards both the gateway port and the browser-control companion port (`local+2` to `remote+2`) when the browser proxy capability is enabled, so Mac-over-SSH topologies can satisfy the same local-only browser proxy contract.
|
||||
|
||||
Gateway APIs and signals worth surfacing:
|
||||
|
||||
- `hello-ok` snapshot/policy fields, including tick interval and limits.
|
||||
- `health`, `presence`, `tick`, `status`, `system-presence`, `sessions.*`, `usage.status`, `usage.cost`, `sessions.usage*`, `node.list`, `node.describe`, pairing APIs, and config/wizard APIs.
|
||||
- Snapshot fields such as presence, health, stateVersion, uptimeMs, auth/session defaults.
|
||||
- Non-loopback gateway security expectations: use `wss`, auth/trusted proxy, and explicit Control UI origins.
|
||||
- Discovery signals: mDNS/SRV, wide-area DNS-SD, Tailscale modes.
|
||||
|
||||
## 3. Topology model
|
||||
|
||||
### 3.1 Gateway kinds
|
||||
|
||||
Initial enum:
|
||||
|
||||
| Kind | Meaning | Detection signals |
|
||||
|---|---|---|
|
||||
| `MacOverSsh` | Localhost URL backed by an SSH tunnel to a Mac/remote host | `UseSshTunnel=true`, localhost gateway URL, SSH host present; future: presence platform macOS |
|
||||
| `Wsl` | Gateway likely running in WSL2 | localhost URL without tunnel, `wsl.exe` available, port/listener/process hints indicate WSL |
|
||||
| `WindowsNative` | Gateway likely running directly on Windows | localhost URL without tunnel and no WSL evidence |
|
||||
| `Tailscale` | Gateway reached via Tailscale DNS/IP | host ends `.ts.net` or IP is in 100.64.0.0/10 |
|
||||
| `RemoteLan` | Gateway reached via LAN/mDNS/private host | RFC1918 IP, `.local`, or non-loopback private hostname |
|
||||
| `Remote` | Public/unknown non-local remote gateway | non-loopback public host |
|
||||
| `Unknown` | Cannot classify | invalid/missing URL or conflicting settings |
|
||||
|
||||
### 3.2 State objects
|
||||
|
||||
Additive shared models:
|
||||
|
||||
```csharp
|
||||
public enum GatewayKind
|
||||
{
|
||||
Unknown,
|
||||
WindowsNative,
|
||||
Wsl,
|
||||
MacOverSsh,
|
||||
Tailscale,
|
||||
RemoteLan,
|
||||
Remote
|
||||
}
|
||||
|
||||
public enum TunnelStatus
|
||||
{
|
||||
NotConfigured,
|
||||
Stopped,
|
||||
Starting,
|
||||
Up,
|
||||
Restarting,
|
||||
Failed
|
||||
}
|
||||
|
||||
public sealed class GatewayTopologyInfo
|
||||
{
|
||||
public GatewayKind DetectedKind { get; set; }
|
||||
public string DisplayName { get; set; }
|
||||
public string GatewayUrl { get; set; }
|
||||
public string Host { get; set; }
|
||||
public bool UsesSshTunnel { get; set; }
|
||||
public string Transport { get; set; }
|
||||
public string Detail { get; set; }
|
||||
}
|
||||
|
||||
public sealed class TunnelCommandCenterInfo
|
||||
{
|
||||
public TunnelStatus Status { get; set; }
|
||||
public string LocalEndpoint { get; set; }
|
||||
public string RemoteEndpoint { get; set; }
|
||||
public string? Host { get; set; }
|
||||
public string? User { get; set; }
|
||||
public string? LastError { get; set; }
|
||||
public DateTime? StartedAt { get; set; }
|
||||
}
|
||||
```
|
||||
|
||||
Extend `GatewayCommandCenterState` with:
|
||||
|
||||
```csharp
|
||||
public GatewayTopologyInfo Topology { get; set; } = new();
|
||||
public TunnelCommandCenterInfo? Tunnel { get; set; }
|
||||
```
|
||||
|
||||
### 3.3 Classifier rules
|
||||
|
||||
Phase 1 classifier should be pure and unit-testable:
|
||||
|
||||
1. If `UseSshTunnel` is true and SSH host is set:
|
||||
- if gateway URL host is localhost/127.0.0.1/::1, classify `MacOverSsh` for now.
|
||||
- if SSH host ends `.ts.net`, include "over Tailscale SSH" in detail but keep tunnel as the primary transport.
|
||||
2. Else if gateway URL host is localhost/127.0.0.1/::1:
|
||||
- classify `WindowsNative` initially.
|
||||
- a later WSL probe can refine to `Wsl`.
|
||||
3. Else if host ends `.ts.net` or IP is in 100.64.0.0/10:
|
||||
- classify `Tailscale`.
|
||||
4. Else if host is RFC1918, `.local`, or common private names:
|
||||
- classify `RemoteLan`.
|
||||
5. Else if host is non-empty:
|
||||
- classify `Remote`.
|
||||
6. Else:
|
||||
- classify `Unknown`.
|
||||
|
||||
Phase 2 WSL refinement:
|
||||
|
||||
- Probe `wsl.exe -l -q` with a short timeout.
|
||||
- Optional port/process detection should be cached and never block UI.
|
||||
- If localhost gateway is connected and WSL evidence is strong, classify `Wsl`.
|
||||
|
||||
## 4. Command Center UX target
|
||||
|
||||
### 4.1 Gateway/topology header card
|
||||
|
||||
Add a top card under the current status header:
|
||||
|
||||
- "Gateway: Windows native / Mac over SSH / WSL / Tailscale / LAN / Remote / Unknown"
|
||||
- URL host and transport: `ws`, `wss`, `ssh tunnel`, `tailnet`, `lan`
|
||||
- tunnel state if configured: `Up`, `Restarting`, `Failed`, `Stopped`
|
||||
- last health timestamp and gateway version/uptime once available from protocol
|
||||
|
||||
### 4.2 Diagnostics categories
|
||||
|
||||
Add categories beyond current node/channel/allowlist/parity:
|
||||
|
||||
| Category | Examples | Repair action |
|
||||
|---|---|---|
|
||||
| `topology` | Localhost URL but no local/tunnel evidence; remote plaintext `ws://`; unknown public host | Explain expected topology; copy URL/settings hints |
|
||||
| `tunnel` | SSH tunnel stopped/restarting/failed | Copy `ssh -N -L ...` command; "Reset tunnel" later |
|
||||
| `wsl` | Localhost likely backed by WSL; NAT or distro reboot may break it | Show WSL-specific diagnostic hints |
|
||||
| `tailscale` | Tailnet host but no tunnel/direct auth mismatch | Show Tailscale/wss/auth hints |
|
||||
| `browser` | `browser.proxy` disabled, policy-filtered, or missing a gateway+2 browser-control host | Explain Settings, allowlist, SSH forward, or local browser-host repair path |
|
||||
| `gateway` | stale health/stateVersion, auth error, not connected | Existing patterns plus topology-specific detail |
|
||||
|
||||
### 4.3 Tray menu badge
|
||||
|
||||
Add a small topology badge next to status:
|
||||
|
||||
- "Gateway: Connected - Mac over SSH"
|
||||
- "Gateway: Connected - Windows native"
|
||||
- "Gateway: Connected - Tailscale"
|
||||
|
||||
### 4.4 Settings hint
|
||||
|
||||
In Settings, show read-only detected topology near gateway URL/tunnel settings: **implemented with a live summary under the topology guide**
|
||||
|
||||
- detected kind
|
||||
- whether settings imply tunnel/direct
|
||||
- warning if URL/tunnel conflict
|
||||
|
||||
### 4.5 Future Mission Control pages
|
||||
|
||||
Keep `HubWindow` as the Command Center host, with pages/sections for:
|
||||
|
||||
1. Overview
|
||||
2. Gateway topology
|
||||
3. Tunnel/transport
|
||||
4. Channels
|
||||
5. Sessions
|
||||
6. Nodes/capabilities
|
||||
7. Command policy/allowlist
|
||||
8. Pairing/devices
|
||||
9. Activity/events
|
||||
10. Permissions/privacy
|
||||
11. Logs/debug/repair
|
||||
|
||||
## 5. Mac parity matrix
|
||||
|
||||
### 5.1 Node command surface
|
||||
|
||||
| Command area | Mac status | Windows status | Priority |
|
||||
|---|---|---|---|
|
||||
| Canvas core | Present | Mostly present | Verify defaults, payload names, A2UI bridge, snapshot shape |
|
||||
| Screen snapshot | Present | Present | Verify defaults: max width, format, quality, metadata |
|
||||
| Screen record | Present | Present | Verify clamps/audio fields; do not live-test without permission |
|
||||
| Camera list/snap/clip | Present | Present | Verify facing/deviceId/delay/default quality |
|
||||
| Location | Present | Present | Align error tokens and permission mode |
|
||||
| Device info/status | Present | Present | Done; keep payload shape tests |
|
||||
| System notify | Present | Present | Add overlay/priority parity later |
|
||||
| System run/which | Present | Present | Verify push event names and approval reasons |
|
||||
| Exec approvals get/set | Present | Present | Base-hash optimistic concurrency implemented |
|
||||
| Browser proxy | Present, local-only | Local bridge present; live smoke blocked until browser-control host listens on gateway+2 | Continue host setup/live-smoke guidance |
|
||||
|
||||
### 5.2 Mission Control surfaces
|
||||
|
||||
| Mac capability | Windows today | Plan |
|
||||
|---|---|---|
|
||||
| Gateway process state | Implemented for detected/managed runtimes | Command Center shows topology, gateway listener process/PID, and managed/detected SSH context; process manager remains only for a future owned local Windows gateway |
|
||||
| Endpoint store/discovery | Implemented first slice | Settings topology presets and detected topology summaries classify local, SSH, WSL, and remote gateway shapes |
|
||||
| SSH tunnel robust state | Implemented | Managed SSH tunnel status/error/runtime details surface in Settings, Command Center, support context, and restart actions |
|
||||
| PortGuardian | Partial | Read-only port diagnostics identify local listeners and owning process/PID; destructive kill actions remain intentionally absent |
|
||||
| HealthStore derived states | Implemented first slice | Command Center warnings include topology-aware gateway, tunnel, browser-control, channel, usage, and node health |
|
||||
| Nodes submenu copy actions | Implemented | Per-node copy and full node inventory copy include command groups, filtered commands, disabled settings, and parity gaps |
|
||||
| Session previews/settings | Implemented | Tray session rows include previews plus thinking/verbose, reset, compact, and delete actions |
|
||||
| Cost 30-day chart | Implemented | Command Center renders 30-day cost bars from `usage.cost` daily totals |
|
||||
| Agent events ring | Implemented | Activity Stream keeps a 400-event rich ring and support bundle window |
|
||||
| Permissions matrix | Implemented first slice | Command Center shows safe Windows privacy settings deep links without probing devices |
|
||||
| Onboarding security banner | Implemented | Setup Wizard warns about agent control of enabled local command/screen/camera/location/browser/canvas surfaces |
|
||||
| Debug actions | Implemented | Tray, Command Center, deep links, and PowerToys expose logs/config/diagnostics, health/update actions, managed SSH restart, support context, debug bundle, browser setup, and copyable diagnostics/summaries |
|
||||
| Voice/Talk | Missing | Separate roadmap track |
|
||||
| Cron/Skills settings | Missing/limited | Separate roadmap track |
|
||||
|
||||
## 6. Browser proxy feasibility
|
||||
|
||||
### 6.1 What it is
|
||||
|
||||
`browser.proxy` is not a generic HTTP proxy. It is a node command that forwards browser-plugin requests through a node-host endpoint and returns structured results and optional extracted files.
|
||||
|
||||
### 6.2 Windows options
|
||||
|
||||
1. **Local gateway/browser-host proxy parity**
|
||||
- Implement only when gateway is local or tunnel-local.
|
||||
- Proxy to `127.0.0.1:{gatewayPort+2}` like Mac.
|
||||
- Use Bearer/token or password header as gateway expects.
|
||||
- Enforce same method/path/query/body/timeout/profile contract.
|
||||
- Enforce same persistent-profile mutation block and file-size cap.
|
||||
- Best Mac parity, but depends on browser plugin host availability on Windows.
|
||||
|
||||
2. **Edge/WebView2 DevTools bridge**
|
||||
- Use WebView2/Edge DevTools protocol from the tray.
|
||||
- More Windows-native, but diverges from gateway browser extension contract.
|
||||
- Riskier and likely not the immediate parity path.
|
||||
|
||||
3. **Do not implement in tray; require browser extension node-host**
|
||||
- Keep tray focused on desktop node and command center.
|
||||
- Command Center explains why `browser.proxy` is absent and how to install/enable the browser plugin.
|
||||
- Lowest risk, but leaves a Mac command gap.
|
||||
|
||||
Recommended: investigate option 1 first, with `browser.proxy` gated to local/tunnel topologies and disabled for remote public gateways unless the upstream browser host contract says otherwise.
|
||||
|
||||
Current Windows implementation status: Windows node now advertises `browser.proxy` and forwards it to the local browser control host at `127.0.0.1:{gateway port + 2}`. It uses the gateway bearer token first and retries with the same shared secret as browser-host password/basic auth if bearer auth is rejected. Managed SSH tunnel mode also forwards the companion browser-control port (`local gateway port + 2` to `remote gateway port + 2`) when the browser proxy capability is enabled. Command Center still performs the read-only feasibility probe and warns when no compatible local browser host is listening, because the command depends on that local service being available.
|
||||
|
||||
## 7. Security and privacy requirements
|
||||
|
||||
1. Diagnostics must never take screenshots, record screen, capture camera, start microphone, or run arbitrary commands.
|
||||
2. Support bundles must not include base64 payloads, tokens, screenshots, recordings, camera data, or command arguments.
|
||||
3. Browser proxy must be local-only until we prove remote behavior is safe and intended.
|
||||
4. Exec approval UI must include command display sanitization before adding "Allow Once/Always" UX.
|
||||
5. Environment override parity should reject PATH and dangerous inherited/override keys.
|
||||
6. Pairing approvals must show identity, platform, app, IP, and repair status before approval.
|
||||
7. Allowlist repair should distinguish safe commands from privacy-sensitive commands. This is already in the Windows Command Center and should remain a product rule.
|
||||
|
||||
## 8. Implementation phases
|
||||
|
||||
### Phase 1: Topology model and gateway card
|
||||
|
||||
Files:
|
||||
|
||||
- `src/OpenClaw.Shared/Models.cs`
|
||||
- `src/OpenClaw.Shared/SettingsData.cs` if optional declared kind is persisted
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/Services/SshTunnelService.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/Windows/HubWindow.xaml`
|
||||
- `src/OpenClaw.Tray.WinUI/Windows/HubWindow.xaml.cs`
|
||||
- `tests/OpenClaw.Shared.Tests/ModelsTests.cs`
|
||||
- `tests/OpenClaw.Tray.Tests/SettingsRoundTripTests.cs` if settings change
|
||||
|
||||
Deliverables:
|
||||
|
||||
- `GatewayKind`, `TunnelStatus`, `GatewayTopologyInfo`, `TunnelCommandCenterInfo`.
|
||||
- Pure topology classifier.
|
||||
- Tunnel state/error/startedAt from `SshTunnelService`.
|
||||
- Gateway card in Command Center.
|
||||
- Topology/tunnel warnings.
|
||||
|
||||
Risk: low. No protocol changes.
|
||||
|
||||
### Phase 2: Better tunnel and WSL diagnostics
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Mac-equivalent SSH options: **implemented for tunnel startup**
|
||||
- `BatchMode=yes`
|
||||
- `ExitOnForwardFailure=yes`
|
||||
- `ServerAliveInterval=15`
|
||||
- `ServerAliveCountMax=3`
|
||||
- `TCPKeepAlive=yes`
|
||||
- Explicit tunnel states (`NotConfigured`, `Stopped`, `Starting`, `Up`, `Restarting`, `Failed`): **implemented**
|
||||
- Fast-fail detection.
|
||||
- Optional random local port fallback.
|
||||
- WSL detection helper with timeout/cache. Explicit `wsl.localhost` / `.wsl` host classification is implemented.
|
||||
- Tunnel reset action.
|
||||
|
||||
Risk: medium. Process lifecycle and port behavior need careful tests.
|
||||
|
||||
### Phase 3: Gateway self and presence model
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Parse `hello-ok` snapshot/version/policy fields: **implemented**
|
||||
- Parse/preserve presence events.
|
||||
- Show gateway version, uptime/stateVersion, auth source, presence count: **implemented in Command Center gateway card**
|
||||
- Add node/presence freshness warnings.
|
||||
|
||||
Risk: low-medium; mostly parsing and UI.
|
||||
|
||||
### Phase 4: Mac-like diagnostics and repair
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Debug/Mission Control actions:
|
||||
- open log: **implemented as Open Logs folder**
|
||||
- open config folder: **implemented**
|
||||
- open session store
|
||||
- run health now: **implemented as Refresh Health**
|
||||
- send test heartbeat
|
||||
- reset managed SSH tunnel: **implemented as Restart SSH Tunnel when Settings owns the tunnel**
|
||||
- restart local gateway if topology is WindowsNative and managed
|
||||
- copy privacy-safe support context: **implemented**
|
||||
- Rolling diagnostics JSONL with rotation: **implemented for privacy-safe app/connection/gateway/tunnel metadata**
|
||||
- Port diagnostics table: **read-only local listener visibility implemented, including owning PID/process name when Windows exposes it**
|
||||
- Manual SSH tunnel detection: **implemented Command Center classification for loopback gateway ports owned by `ssh`, so hand-started local forwards are not mislabeled as native Windows gateways**
|
||||
- Gateway runtime owner summary: **implemented in Command Center topology/support context so local gateway or SSH-forward listener process name, PID, and port are visible without managing the process**
|
||||
- Browser proxy SSH forward warning: **implemented targeted Command Center guidance when an SSH tunnel gateway is up but the companion `gateway port + 2` browser-control forward is missing**
|
||||
- Browser proxy invoke error guidance: **implemented `browser.proxy` unreachable/timeout errors that name `127.0.0.1:{gateway+2}` and show the exact SSH local-forward shape**
|
||||
- Settings SSH browser-forward guidance: **implemented Settings copy explaining that the managed SSH tunnel forwards `local-port+2` to `remote-port+2` for `browser.proxy` when the browser proxy bridge is enabled**
|
||||
- Settings SSH test tunnel parity: **implemented temporary Settings test tunnels with the same optional browser-control `local+2` forward runtime uses when Browser proxy bridge is enabled**
|
||||
- Settings SSH tunnel preview: **implemented selectable Settings preview of the exact managed `ssh -N -L ...` command, including the optional browser-control companion forward**
|
||||
- Browser proxy disabled guidance: **implemented a specific Command Center warning/copy hint when `browser.proxy` is intentionally disabled in Settings**
|
||||
- Asymmetric SSH browser guidance: **fixed Command Center and `browser.proxy` invoke guidance so local `gateway+2` and remote `gateway+2` can differ**
|
||||
- SSH local browser-port source: **fixed Command Center browser diagnostics to derive the local browser-control port from the active tunnel local endpoint instead of stale saved gateway URLs**
|
||||
- Browser-control host runtime smoke: **verified the upstream browser-control host can listen locally on `127.0.0.1:{gateway+2}`, return HTTP 200 from `/` and `/tabs`, and appear in Command Center port diagnostics with owning PID/process**
|
||||
- Browser proxy auth guidance: **implemented warnings for QR/bootstrap-paired Windows nodes that advertise `browser.proxy` without a saved gateway shared token, and clarified invoke errors for missing versus mismatched browser-control auth**
|
||||
|
||||
Risk: medium-high for kill/restart actions; start as read-only/copy actions.
|
||||
|
||||
### Phase 5: Node command byte-for-byte parity audit fixes
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Verify and align canvas/screen/camera/location/system payload defaults and error tokens.
|
||||
- Verify push event names for exec.
|
||||
- Add missing base-hash concurrency semantics if needed: **implemented for remote exec approval policy edits**
|
||||
- Add `browser.proxy` feasibility prototype or explicit "not implemented" install guidance: **local browser-control bridge implemented; host runtime and Command Center listener detection smoke-tested; remaining end-to-end invoke blocker is matching operator/gateway auth for the active gateway**
|
||||
|
||||
Risk: varies; `browser.proxy` is medium-high.
|
||||
|
||||
### Phase 6: Security/privacy UX parity
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Windows permission matrix with deep links:
|
||||
- camera
|
||||
- microphone
|
||||
- location
|
||||
- notifications
|
||||
- broad file system access if relevant
|
||||
- screen capture/graphics capture guidance
|
||||
- First read-only Command Center slice is implemented. It surfaces these settings pages and explanatory rows, but intentionally does not query, request, or exercise device permissions.
|
||||
- Capability diagnostics copy is implemented for declared commands, gateway allowlist status, and privacy-sensitive opt-ins.
|
||||
- Mac-style onboarding security warning: **implemented in Setup Wizard Node Mode step, warning users that approved agents can run local commands and access enabled screen/camera/location/browser/canvas surfaces**
|
||||
- Topology choice onboarding: **first Settings guide implemented with local, WSL, SSH tunnel, and remote/Tailscale presets**
|
||||
- Exec approval dialog with sanitizer and three-button flow: **implemented for local `Prompt` policy decisions with Allow once / Always allow / Deny**
|
||||
- Exec approval remote-policy hardening: **implemented guardrails so `system.execApprovals.set` cannot remotely switch to default allow, install broad/dangerous allow rules, or overwrite a newer local policy without a matching `baseHash`**
|
||||
- Host env sanitizer parity hardening: **implemented expanded blocking for secret-looking overrides such as tokens, passwords, API keys, access keys, private keys, client secrets, and connection strings**
|
||||
- Dangerous command opt-in guidance: **implemented copyable safety guidance for camera/screen privacy-sensitive commands without emitting one-click dangerous repair commands**
|
||||
- Node capability settings: **implemented Settings toggles for canvas, screen, camera, location, and browser proxy command groups so privacy-sensitive surfaces can be disabled before reconnecting/re-pairing**
|
||||
- Disabled capability diagnostics: **implemented Command Center distinction between intentionally disabled Settings groups and true gateway allowlist/parity gaps**
|
||||
- Browser proxy policy diagnostics: **implemented a specific Command Center warning/copy action for declared `browser.proxy` commands filtered by gateway policy, instead of burying them under generic blocked-command output**
|
||||
|
||||
Risk: high for exec/security. Do not rush.
|
||||
|
||||
### Phase 7: Mission Control depth
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Session previews with thinking/verbose controls.
|
||||
- Cost 30-day bars: **implemented in Command Center from `usage.cost` daily totals**
|
||||
- Node copy submenus / summaries: **implemented first Command Center copy action**
|
||||
- Channel health summary and copyable context: **implemented first summary plus Command Center start/stop actions**
|
||||
- Channel schema forms and QR login flows: **implemented first Windows surface with channel setup/dashboard deep links and copyable channel context**
|
||||
- Skills/Cron settings: **implemented first Windows surface with Command Center dashboard entrypoints and copyable guidance**
|
||||
- Agent events ring expansion: **implemented first Command Center recent-activity panel with copy/open-stream actions**
|
||||
- Hover HUD / richer tray tooltip: **implemented with topology, channel, node, warning, and activity summary**
|
||||
- Update status: **implemented in Command Center support/debug section and copied support context, including current version, latest prompted version when known, and last check outcome**
|
||||
|
||||
Risk: medium; mostly UI and gateway method plumbing.
|
||||
|
||||
### Phase 8: Optional local Windows gateway convenience
|
||||
|
||||
This is optional and should not block Mission Control.
|
||||
|
||||
Deliverables:
|
||||
|
||||
- Detect existing local Windows gateway.
|
||||
- Attach to it and show logs/version/port.
|
||||
- Only if user opts in: start/stop/restart a managed local gateway.
|
||||
|
||||
Risk: high. Requires Node/runtime/version/process ownership. Keep separate from topology-aware Command Center.
|
||||
|
||||
## 9. Test strategy
|
||||
|
||||
### Unit tests
|
||||
|
||||
- Topology classifier matrix:
|
||||
- localhost/no tunnel -> WindowsNative
|
||||
- localhost/tunnel -> MacOverSsh
|
||||
- `.ts.net` -> Tailscale
|
||||
- 100.64.0.0/10 -> Tailscale
|
||||
- 192.168/10/172.16/172.31 -> RemoteLan
|
||||
- `.local` -> RemoteLan
|
||||
- public host -> Remote
|
||||
- invalid/missing -> Unknown
|
||||
- Tunnel info state mapping.
|
||||
- Diagnostic sorting/dedupe with topology/tunnel warnings.
|
||||
- Settings round-trip if new persisted fields are added.
|
||||
- Existing capability and command-center tests stay green.
|
||||
|
||||
### Safe live tests
|
||||
|
||||
No screen recording, camera capture, or microphone.
|
||||
|
||||
1. Mac gateway over SSH tunnel:
|
||||
- Enable tunnel.
|
||||
- Expect Command Center topology: Mac over SSH.
|
||||
- Expect tunnel state: Up.
|
||||
- Health/channel events continue.
|
||||
2. Localhost without tunnel:
|
||||
- Expect Windows native until WSL detection exists.
|
||||
- If no gateway, show clear connection warning.
|
||||
3. Tailscale URL:
|
||||
- Use a synthetic settings profile or non-invasive connection check.
|
||||
- Expect topology classification only.
|
||||
4. Remote LAN URL:
|
||||
- Expect Remote LAN classification.
|
||||
5. Tunnel failure:
|
||||
- Stop only the known SSH process if started by the app.
|
||||
- Expect tunnel warning/restart state.
|
||||
6. Allowlist regression:
|
||||
- Safe repair remains copyable.
|
||||
- Dangerous camera/screen commands remain informational.
|
||||
|
||||
### Required validation
|
||||
|
||||
After code changes:
|
||||
|
||||
```powershell
|
||||
.\build.ps1
|
||||
dotnet test .\tests\OpenClaw.Shared.Tests\OpenClaw.Shared.Tests.csproj --no-restore
|
||||
dotnet test .\tests\OpenClaw.Tray.Tests\OpenClaw.Tray.Tests.csproj --no-restore
|
||||
```
|
||||
|
||||
## 10. Open questions
|
||||
|
||||
1. Should `DeclaredGatewayKind` be a persisted user hint, or should detection remain purely derived?
|
||||
2. Should Mac-over-SSH be named `SshTunnel` until presence confirms a Mac platform?
|
||||
3. Should `browser.proxy` live in the tray, or should Command Center guide users to install/enable the browser plugin host?
|
||||
4. Do we want a future "managed local gateway" mode, or only "detected local gateway"?
|
||||
5. How much Tailscale integration should Windows own vs merely detect?
|
||||
6. Should WSL detection use process/port probing, `wsl.exe`, or gateway presence fields once available?
|
||||
7. Should support bundles include topology/tunnel diagnostics by default, and how should they redact host/user/IP? **Implemented for Command Center copy support context with redacted gateway URL, topology detail, tunnel endpoints/errors, and port details.**
|
||||
|
||||
## 11. Immediate recommendation
|
||||
|
||||
Implement Phase 1 now:
|
||||
|
||||
- Add topology/tunnel models and classifier.
|
||||
- Surface them in Command Center.
|
||||
- Add topology/tunnel warnings.
|
||||
- Keep everything read-only and diagnostic.
|
||||
|
||||
This is the cleanest bridge between today's working Command Center and the Mac-style Mission Control product vision. It does not require a native Windows gateway, protocol changes, or privacy-sensitive live tests.
|
||||
|
||||
@ -1,105 +0,0 @@
|
||||
# Onboarding Wizard
|
||||
|
||||
The onboarding wizard is a guided 6-screen setup experience for new Windows users, matching the macOS onboarding flow.
|
||||
|
||||
## Overview
|
||||
|
||||
On first launch (or when no gateway token is configured), the wizard walks users through:
|
||||
|
||||
1. **Welcome** — Greeting and introduction
|
||||
2. **Connection** — Gateway selection and authentication
|
||||
3. **Wizard** — Gateway-driven configuration (AI provider, personality, channels)
|
||||
4. **Permissions** — Windows system permission review
|
||||
5. **Chat** — First conversation with the agent
|
||||
6. **Ready** — Feature summary and completion
|
||||
|
||||
The wizard adapts based on the connection mode:
|
||||
- **Local gateway**: All 6 screens (including Wizard for gateway configuration)
|
||||
- **Remote gateway**: Skips Wizard (assumes gateway is pre-configured)
|
||||
- **Configure Later**: Minimal flow — Welcome → Connection → Ready
|
||||
|
||||
## Screen Details
|
||||
|
||||
### Welcome
|
||||
Displays the OpenClaw lobster icon, app title, and a brief description. Single "Get Started" button advances to Connection.
|
||||
|
||||
### Connection
|
||||
Three connection modes via radio buttons:
|
||||
- **Local** — Pre-fills `ws://localhost:18789` for a gateway running on the same machine or in WSL
|
||||
- **Remote** — Enter a gateway URL and bootstrap token, or paste a base64url-encoded setup code
|
||||
- **Later** — Skip connection for now; configure from the tray menu after setup
|
||||
|
||||
Connection testing performs a real WebSocket handshake with Ed25519 device authentication. Status feedback shows connecting, connected, pairing required, token mismatch, or timeout.
|
||||
|
||||
When pairing approval is required, the wizard displays the gateway CLI approval command, copies it to the clipboard, and shows a notification with a copy action. Approval still happens through the gateway's normal `openclaw devices approve <device-id>` flow; the Windows tray does not edit gateway pairing state directly.
|
||||
|
||||
### Wizard
|
||||
Renders server-defined setup steps via RPC (`wizard.start` / `wizard.next`). The gateway controls the flow — steps can be:
|
||||
- **Note** — informational messages
|
||||
- **Confirm** — yes/no decisions
|
||||
- **Text** — free-form input (with PasswordBox for sensitive fields like API keys)
|
||||
- **Select** — radio button choices (e.g., AI provider selection)
|
||||
- **Progress** — loading indicator for background operations
|
||||
|
||||
If the gateway doesn't support the wizard protocol or is unreachable, this screen shows an "offline" message and can be skipped.
|
||||
|
||||
### Permissions
|
||||
Checks 5 Windows permissions using native APIs and registry:
|
||||
- Notifications (Toast capability)
|
||||
- Camera (Windows.Devices.Enumeration)
|
||||
- Microphone (Windows.Devices.Enumeration)
|
||||
- Screen Capture (Graphics.Capture)
|
||||
- Location (optional, registry-based)
|
||||
|
||||
Each permission shows its current status (Enabled/Disabled/Allowed/Denied) with an "Open Settings" button linking to the relevant `ms-settings:` URI.
|
||||
|
||||
### Chat
|
||||
Embeds the gateway's web chat UI via WebView2, matching the post-setup `ChatWindow` for visual consistency. Uses the shared `GatewayChatHelper` for URL building and WebView2 initialization.
|
||||
|
||||
On first load, a bootstrap message is auto-injected to kick off the gateway's first-run ritual (BOOTSTRAP.md). The message is safely encoded using `JsonSerializer.Serialize` to prevent XSS.
|
||||
|
||||
### Ready
|
||||
Displays 5 feature cards (Tray Menu, Channels, Voice, Canvas, Skills) with localized subtitles. Includes a "Launch at Login" toggle and a "Finish" button that saves settings and closes the wizard.
|
||||
|
||||
## Security
|
||||
|
||||
The onboarding wizard follows these security practices:
|
||||
|
||||
- **XSS prevention**: Bootstrap messages encoded via `JsonSerializer.Serialize` for safe JS injection
|
||||
- **Input validation**: Setup codes limited to 2KB, decoded JSON validated, gateway URLs checked via `GatewayUrlHelper`
|
||||
- **URI scheme whitelists**: Only `ms-settings:` for permissions, `http/https` for chat
|
||||
- **Navigation restriction**: WebView2 `NavigationStarting` handler blocks navigation to external origins
|
||||
- **Token protection**: Query params stripped from all log output; WebView2 accelerator keys disabled
|
||||
- **Gateway-owned pairing**: Device approval uses the gateway CLI/API path so scope checks, token issuance, audit, and broadcasts stay centralized
|
||||
- **Error sanitization**: Exception details logged but not shown to users
|
||||
|
||||
## Localization
|
||||
|
||||
All user-visible strings use `LocalizationHelper.GetString()` with the `Onboarding_*` key namespace. Supported languages are discovered from the `Strings/<locale>/Resources.resw` directories; the current locales are English, French, Dutch, Chinese Simplified, and Chinese Traditional.
|
||||
|
||||
Translations are AI-generated following the repo convention. Technical terms (Gateway, Token, Node Mode) are kept in English across all locales.
|
||||
|
||||
## Developer Guide
|
||||
|
||||
See [DEVELOPMENT.md](../DEVELOPMENT.md#developing--testing-the-onboarding-wizard) for build instructions, environment variables, and testing workflow.
|
||||
|
||||
### Test Isolation
|
||||
|
||||
`SettingsManager` loads `%APPDATA%\OpenClawTray\settings.json` by default. Onboarding tests must not use `new SettingsManager()` without an isolated settings directory, because local user settings such as `EnableNodeMode=true` change page ordering by intentionally skipping operator-only Wizard and Chat pages.
|
||||
|
||||
Use a temp settings directory for tests that construct `SettingsManager`, or set `OPENCLAW_TRAY_DATA_DIR` before the test process starts.
|
||||
|
||||
### Key Files
|
||||
|
||||
| Path | Purpose |
|
||||
|------|---------|
|
||||
| `Onboarding/OnboardingWindow.cs` | Host window with WebView2 overlay |
|
||||
| `Onboarding/OnboardingApp.cs` | Functional UI root component, page navigation |
|
||||
| `Onboarding/Services/OnboardingState.cs` | Shared state across all pages |
|
||||
| `Onboarding/Pages/*.cs` | Individual wizard screens |
|
||||
| `Onboarding/Services/SetupCodeDecoder.cs` | Base64url setup code parsing |
|
||||
| `Onboarding/Services/InputValidator.cs` | Security input validation |
|
||||
| `Onboarding/Services/WizardStepParser.cs` | Wizard JSON step parsing |
|
||||
| `Onboarding/Services/LocalGatewayApprover.cs` | Local gateway URL classification |
|
||||
| `Onboarding/Services/PermissionChecker.cs` | Windows permission checks |
|
||||
| `Helpers/GatewayChatHelper.cs` | Shared WebView2 chat URL builder |
|
||||
@ -35,33 +35,9 @@ Open Command Palette (`Win+Alt+Space`), type **"OpenClaw"** — you should see t
|
||||
| Command | Action |
|
||||
|---------|--------|
|
||||
| **🦞 Open Dashboard** | Opens the OpenClaw web dashboard in your default browser |
|
||||
| **💬 Dashboard: Sessions** | Opens the sessions dashboard |
|
||||
| **📡 Dashboard: Channels** | Opens the channel configuration dashboard |
|
||||
| **🧩 Dashboard: Skills** | Opens the skills dashboard |
|
||||
| **⏱️ Dashboard: Cron** | Opens the scheduled jobs dashboard |
|
||||
| **💬 Web Chat** | Opens the embedded Chat page in OpenClaw Tray |
|
||||
| **💬 Web Chat** | Opens the embedded Web Chat window in OpenClaw Tray |
|
||||
| **📝 Quick Send** | Opens the Quick Send dialog to compose a message |
|
||||
| **🧭 Setup Wizard** | Opens QR, setup code, and manual gateway pairing |
|
||||
| **🧭 Command Center** | Opens gateway, tunnel, node, browser, and support diagnostics |
|
||||
| **🔄 Run Health Check** | Refreshes gateway or node connection health |
|
||||
| **⬇️ Check for Updates** | Runs a manual GitHub Releases update check |
|
||||
| **⚡ Activity Stream** | Opens recent tray activity and support bundle actions |
|
||||
| **📋 Notification History** | Opens recent OpenClaw tray notifications in the Activity page |
|
||||
| **⚙️ Settings** | Opens the OpenClaw Tray Settings page |
|
||||
| **📄 Open Log File** | Opens the current OpenClaw Tray log |
|
||||
| **📁 Open Logs Folder** | Opens the OpenClaw Tray logs folder |
|
||||
| **🗂️ Open Config Folder** | Opens the OpenClaw Tray configuration folder |
|
||||
| **🧪 Open Diagnostics Folder** | Opens the diagnostics JSONL folder |
|
||||
| **📋 Copy Support Context** | Copies redacted Command Center support metadata |
|
||||
| **🧰 Copy Debug Bundle** | Copies combined support, port, capability, node, channel, and activity diagnostics |
|
||||
| **🌐 Copy Browser Setup** | Copies browser.proxy and node-host setup guidance |
|
||||
| **🔌 Copy Port Diagnostics** | Copies gateway/browser/tunnel port owners and stop hints |
|
||||
| **🛡️ Copy Capability Diagnostics** | Copies permission, allowlist, and parity diagnostics |
|
||||
| **🖥️ Copy Node Inventory** | Copies node capabilities, commands, and policy status |
|
||||
| **📡 Copy Channel Summary** | Copies channel health and start/stop availability |
|
||||
| **⚡ Copy Activity Summary** | Copies recent tray activity |
|
||||
| **🧩 Copy Extensibility Summary** | Copies channel, skills, and cron surface guidance |
|
||||
| **🔁 Restart SSH Tunnel** | Restarts the tray-managed SSH tunnel when enabled |
|
||||
| **⚙️ Settings** | Opens the OpenClaw Tray Settings dialog |
|
||||
|
||||
## Usage
|
||||
|
||||
@ -111,30 +87,6 @@ Get-AppxPackage -Name '*OpenClaw*' | Remove-AppxPackage
|
||||
| Command | Deep link |
|
||||
|---------|-----------|
|
||||
| Open Dashboard | `openclaw://dashboard` |
|
||||
| Dashboard: Sessions | `openclaw://dashboard/sessions` |
|
||||
| Dashboard: Channels | `openclaw://dashboard/channels` |
|
||||
| Dashboard: Skills | `openclaw://dashboard/skills` |
|
||||
| Dashboard: Cron | `openclaw://dashboard/cron` |
|
||||
| Web Chat | `openclaw://chat` |
|
||||
| Quick Send | `openclaw://send` |
|
||||
| Setup Wizard | `openclaw://setup` |
|
||||
| Command Center | `openclaw://commandcenter` |
|
||||
| Run Health Check | `openclaw://healthcheck` |
|
||||
| Check for Updates | `openclaw://check-updates` |
|
||||
| Activity Stream | `openclaw://activity` |
|
||||
| Notification History | `openclaw://history` |
|
||||
| Settings | `openclaw://settings` |
|
||||
| Open Log File | `openclaw://logs` |
|
||||
| Open Logs Folder | `openclaw://log-folder` |
|
||||
| Open Config Folder | `openclaw://config` |
|
||||
| Open Diagnostics Folder | `openclaw://diagnostics` |
|
||||
| Copy Support Context | `openclaw://support-context` |
|
||||
| Copy Debug Bundle | `openclaw://debug-bundle` |
|
||||
| Copy Browser Setup | `openclaw://browser-setup` |
|
||||
| Copy Port Diagnostics | `openclaw://port-diagnostics` |
|
||||
| Copy Capability Diagnostics | `openclaw://capability-diagnostics` |
|
||||
| Copy Node Inventory | `openclaw://node-inventory` |
|
||||
| Copy Channel Summary | `openclaw://channel-summary` |
|
||||
| Copy Activity Summary | `openclaw://activity-summary` |
|
||||
| Copy Extensibility Summary | `openclaw://extensibility-summary` |
|
||||
| Restart SSH Tunnel | `openclaw://restart-ssh-tunnel` |
|
||||
|
||||
@ -1,52 +0,0 @@
|
||||
# Releasing OpenClaw Windows Hub
|
||||
|
||||
This repo uses **GitVersion + CI** for release versioning.
|
||||
The canonical release flow is **tag-driven**, not manual file patching.
|
||||
|
||||
## TL;DR
|
||||
|
||||
1. Merge approved changes into `master`.
|
||||
2. Create and push a semantic tag:
|
||||
```powershell
|
||||
git checkout master
|
||||
git pull --ff-only origin master
|
||||
git tag -a vX.Y.Z -m "Release vX.Y.Z"
|
||||
git push origin master
|
||||
git push origin vX.Y.Z
|
||||
```
|
||||
3. CI (`.github/workflows/ci.yml`) builds/signs/publishes artifacts and creates the GitHub release from that tag.
|
||||
|
||||
## Why this is the correct flow
|
||||
|
||||
- `GitVersion.yml` is configured for `ContinuousDelivery` with `tag-prefix: 'v'`.
|
||||
- CI computes version from git history/tags and passes it to builds (`-p:Version=...`).
|
||||
- CI patches MSIX manifest version during build, so releases are consistent across EXE/MSIX assets.
|
||||
|
||||
## Important rules
|
||||
|
||||
- **Do not manually bump** version files for routine releases:
|
||||
- `src/OpenClaw.Tray/OpenClaw.Tray.csproj`
|
||||
- `src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj`
|
||||
- `src/OpenClaw.Tray.WinUI/Package.appxmanifest`
|
||||
- Treat csproj `<Version>` as a **local fallback** for dev builds.
|
||||
- Release versions should come from the **tag** (`vX.Y.Z`).
|
||||
|
||||
## Verify release pipeline
|
||||
|
||||
After pushing a tag, confirm in GitHub Actions:
|
||||
- workflow: **Build and Test**
|
||||
- trigger ref: `refs/tags/vX.Y.Z`
|
||||
- jobs complete successfully (build, build-msix, release)
|
||||
- release assets are attached to the tag release
|
||||
|
||||
## If you need to retag
|
||||
|
||||
If a tag points to the wrong commit:
|
||||
|
||||
```powershell
|
||||
git tag -d vX.Y.Z
|
||||
git push origin :refs/tags/vX.Y.Z
|
||||
git tag -a vX.Y.Z -m "Release vX.Y.Z"
|
||||
git push origin vX.Y.Z
|
||||
```
|
||||
|
||||
@ -43,35 +43,16 @@ After the installer finishes, OpenClaw Tray starts automatically. Look for the
|
||||
|
||||
If you don't see it, check the **hidden icons** area (the `^` arrow next to the tray).
|
||||
|
||||
### 5. Onboarding Wizard
|
||||
### 5. Configure the Connection
|
||||
|
||||
On first launch, Molty opens a **6-screen onboarding wizard** that walks you through setup:
|
||||
On first launch, a **Welcome** dialog appears. Click **Open Settings** to configure:
|
||||
|
||||
1. **Welcome** — A friendly greeting introducing OpenClaw and Molty. Click **Get Started** to begin.
|
||||
| Setting | What to enter |
|
||||
|---------|--------------|
|
||||
| **Gateway URL** | `ws://localhost:18789` (if running OpenClaw locally) or your remote gateway address |
|
||||
| **Token** | Your OpenClaw API token from [openclaw.ai](https://openclaw.ai) |
|
||||
|
||||
2. **Connection** — Choose how to connect to your gateway:
|
||||
- **Local** — Select this if the gateway runs on the same machine or in WSL. The URL is pre-filled to `ws://localhost:18789`.
|
||||
- **Remote** — Enter your gateway URL and bootstrap token manually, **or** paste a base64url-encoded **setup code** (a single string containing both URL and token).
|
||||
- **Later** — Skip connection setup for now. You can configure it later from the tray menu → Settings.
|
||||
|
||||
After entering your details, click **Test Connection**. The wizard performs a real WebSocket handshake with Ed25519 device authentication and shows real-time status feedback (connecting → connected → pairing).
|
||||
|
||||
3. **Wizard** — If your gateway supports it, this screen walks you through gateway-driven configuration steps (AI provider selection, personality setup, communication channels). The steps are defined by your gateway via RPC. If the gateway doesn't support wizard mode, this screen is skipped automatically.
|
||||
|
||||
4. **Permissions** — Reviews Windows system permissions needed for full functionality:
|
||||
- **Notifications** — for toast alerts
|
||||
- **Camera** — for camera capture
|
||||
- **Microphone** — for voice input
|
||||
- **Screen Capture** — for screenshots
|
||||
- **Location** — optional, for location-aware features; packaged installs declare this capability so Windows may prompt for location consent the first time it is used
|
||||
|
||||
Each permission shows its current status. Click **Open Settings** next to any permission to jump directly to the relevant Windows Settings page.
|
||||
|
||||
5. **Chat** — Meet your agent! This screen opens a live chat powered by the gateway's web UI. A bootstrap message is sent automatically to kick off your first conversation.
|
||||
|
||||
6. **Ready** — A summary of available features (tray menu, channels, voice, canvas, skills). Toggle **Launch at Login** to start Molty with Windows, then click **Finish** to complete setup.
|
||||
|
||||
After the wizard, the tray icon turns green when connected. You can re-run the wizard or change settings anytime from the tray menu.
|
||||
Click **Save**. Molty will connect to the gateway and the tray icon will turn green when connected.
|
||||
|
||||
## Tray Icon Status
|
||||
|
||||
@ -91,35 +72,11 @@ OpenClaw Tray responds to `openclaw://` deep links, which can be invoked from a
|
||||
| Link | Action |
|
||||
|------|--------|
|
||||
| `openclaw://dashboard` | Open the OpenClaw web dashboard |
|
||||
| `openclaw://dashboard/sessions` | Open the sessions dashboard page |
|
||||
| `openclaw://dashboard/channels` | Open the channels dashboard page |
|
||||
| `openclaw://dashboard/skills` | Open the skills dashboard page |
|
||||
| `openclaw://dashboard/cron` | Open the cron dashboard page |
|
||||
| `openclaw://chat` | Open the embedded Chat page |
|
||||
| `openclaw://chat` | Open the embedded Web Chat window |
|
||||
| `openclaw://send` | Open the Quick Send dialog |
|
||||
| `openclaw://send?message=Hello` | Open Quick Send with pre-filled text |
|
||||
| `openclaw://settings` | Open the Settings page |
|
||||
| `openclaw://setup` | Open the Setup Wizard |
|
||||
| `openclaw://commandcenter` | Open Command Center diagnostics |
|
||||
| `openclaw://activity` | Open the Activity page |
|
||||
| `openclaw://history` | Open the Activity page filtered to notification history |
|
||||
| `openclaw://healthcheck` | Run a manual health check |
|
||||
| `openclaw://check-updates` | Run a manual update check |
|
||||
| `openclaw://logs` | Open the current tray log file |
|
||||
| `openclaw://log-folder` | Open the logs folder |
|
||||
| `openclaw://config` | Open the config folder |
|
||||
| `openclaw://diagnostics` | Open the diagnostics JSONL folder |
|
||||
| `openclaw://support-context` | Copy redacted support context |
|
||||
| `openclaw://debug-bundle` | Copy a combined debug bundle for support |
|
||||
| `openclaw://browser-setup` | Copy browser.proxy/browser-control setup guidance |
|
||||
| `openclaw://port-diagnostics` | Copy gateway/browser/tunnel port diagnostics with owner PID stop hints |
|
||||
| `openclaw://capability-diagnostics` | Copy permissions, allowlist, and parity diagnostics |
|
||||
| `openclaw://node-inventory` | Copy node capabilities, commands, and policy status |
|
||||
| `openclaw://channel-summary` | Copy channel health and start/stop availability |
|
||||
| `openclaw://activity-summary` | Copy recent tray activity for troubleshooting |
|
||||
| `openclaw://extensibility-summary` | Copy channel, skills, and cron dashboard surface guidance |
|
||||
| `openclaw://restart-ssh-tunnel` | Restart the tray-managed SSH tunnel when enabled |
|
||||
| `openclaw://agent?message=Hello` | Send a message directly to the connected gateway |
|
||||
| `openclaw://settings` | Open the Settings dialog |
|
||||
| `openclaw://agent?message=Hello` | Send a message directly (with confirmation) |
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
@ -150,26 +107,6 @@ openclaw devices approve <device-id>
|
||||
|
||||
See [issue #81](https://github.com/openclaw/openclaw-windows-node/issues/81) for context on this flow.
|
||||
|
||||
### Setup code doesn't work
|
||||
|
||||
- Make sure you paste the **entire** setup code — it's a single base64url-encoded string.
|
||||
- Check for accidental leading/trailing whitespace.
|
||||
- The code must be from a compatible gateway version. Try entering the gateway URL and token manually instead.
|
||||
|
||||
### Connection test fails
|
||||
|
||||
- Verify the gateway URL is correct (e.g., `ws://localhost:18789` for local, or the full URL for remote).
|
||||
- Check that your token is valid and hasn't expired.
|
||||
- If the gateway is on another machine, ensure Windows Firewall allows traffic on the gateway port.
|
||||
- See the log at `%LOCALAPPDATA%\OpenClawTray\openclaw-tray.log` for detailed error messages.
|
||||
|
||||
### Wizard shows "offline"
|
||||
|
||||
The Wizard screen relies on the gateway's wizard protocol. If it shows offline:
|
||||
- The gateway may not support wizard mode yet — this is fine, configuration can be done later.
|
||||
- Check that the gateway is running and reachable.
|
||||
- You can skip the Wizard screen and configure your gateway manually from the tray menu → Settings.
|
||||
|
||||
### Settings are not saved
|
||||
|
||||
Settings are stored at `%APPDATA%\OpenClawTray\settings.json`. If this file is corrupt, delete it and reconfigure from scratch.
|
||||
|
||||
@ -1,17 +1,17 @@
|
||||
# Test Coverage Summary
|
||||
|
||||
**1570 tests total** (1182 shared + 388 tray) — all passing ✅
|
||||
**571 tests total** (478 shared + 93 tray) — all passing ✅
|
||||
|
||||
| Metric | Value |
|
||||
|--------|-------|
|
||||
| Total Tests | 1570 |
|
||||
| Passing | 1570 (100%) |
|
||||
| Total Tests | 571 |
|
||||
| Passing | 571 (100%) |
|
||||
| Failing | 0 |
|
||||
| Framework | xUnit 2.9.3 / .NET 10.0 |
|
||||
|
||||
## Test Projects
|
||||
|
||||
### OpenClaw.Shared.Tests — 1182 tests
|
||||
### OpenClaw.Shared.Tests — 478 tests
|
||||
|
||||
#### ModelsTests
|
||||
- **AgentActivityTests** (~15) — glyph mapping for all ActivityKind values, display text formatting
|
||||
@ -71,26 +71,29 @@
|
||||
|
||||
---
|
||||
|
||||
### OpenClaw.Tray.Tests — 388 tests
|
||||
### OpenClaw.Tray.Tests — 93 tests
|
||||
|
||||
#### Core Tray Tests
|
||||
#### MenuDisplayHelperTests (~40)
|
||||
- `GetStatusIcon` — emoji mapping for Connected/Disconnected/Connecting/Error states
|
||||
- `GetChannelStatusIcon` — status icons for running/idle/pending/error/disconnected + case-insensitive variants
|
||||
- `GetNextToggleValue` — ON↔OFF toggling, case handling
|
||||
- Unknown/empty status fallback
|
||||
|
||||
- **MenuDisplayHelperTests** (~40) — `GetStatusIcon` emoji mapping for Connected/Disconnected/Connecting/Error states, `GetChannelStatusIcon` status icons for running/idle/pending/error/disconnected + case-insensitive variants, `GetNextToggleValue` ON↔OFF toggling, unknown/empty status fallback
|
||||
- **MenuPositionerTests** (~15) — Screen edge clamping (top-left, bottom-right), taskbar-at-right scenario, menu positioning relative to cursor
|
||||
- **SettingsRoundTripTests** (~15) — Serialization/deserialization round trips, default values on missing keys, backward compatibility with older settings formats
|
||||
- **DeepLinkParserTests** (~23) — `ParseDeepLink` protocol validation, null/empty handling, subpath parsing, trailing slash stripping, query parameter extraction, URL-encoded message handling
|
||||
#### MenuPositionerTests (~15)
|
||||
- Screen edge clamping (top-left, bottom-right)
|
||||
- Taskbar-at-right scenario
|
||||
- Menu positioning relative to cursor
|
||||
|
||||
#### Onboarding Tests
|
||||
#### SettingsRoundTripTests (~15)
|
||||
- Serialization/deserialization round trips
|
||||
- Default values on missing keys
|
||||
- Backward compatibility with older settings formats
|
||||
|
||||
- **OnboardingStateTests** (19) — Page order, mode logic, route changes, wizard state persistence, completion, disposal
|
||||
- **GatewayChatHelperTests** (11) — URL scheme conversion, token encoding, localhost checks, session keys
|
||||
- **LocalGatewayApproverTests** (13) — IsLocalGateway for localhost/remote/edge cases
|
||||
- **SetupCodeDecoderTests** (14) — Base64url decode, size limits, JSON validation, URL/token extraction
|
||||
- **GatewayHealthCheckTests** (6) — Health URI building, scheme conversion, port preservation
|
||||
- **SecurityValidationTests** (16) — Locale whitelist, port range, path traversal, URI scheme validation
|
||||
- **WizardStepParsingTests** (12) — JSON step parsing, options, completion, sensitive fields
|
||||
- **GatewayDiscoveryServiceTests** — mDNS host selection and connection URL regression coverage
|
||||
- **LocalizationValidationTests** — locale key parity, onboarding key presence, duplicate detection, and all-or-none translation consistency
|
||||
#### DeepLinkParserTests (~23)
|
||||
- `ParseDeepLink` — protocol validation, null/empty handling, subpath parsing, trailing slash stripping
|
||||
- Query parameter extraction (`GetQueryParam`)
|
||||
- URL-encoded message handling
|
||||
- Multiple query parameters, missing keys
|
||||
|
||||
---
|
||||
|
||||
@ -107,9 +110,6 @@ dotnet test tests/OpenClaw.Tray.Tests
|
||||
# Specific test class
|
||||
dotnet test --filter "FullyQualifiedName~MenuDisplayHelperTests"
|
||||
|
||||
# Onboarding tests only
|
||||
dotnet test --filter "FullyQualifiedName~Onboarding"
|
||||
|
||||
# Verbose output
|
||||
dotnet test --logger "console;verbosity=detailed"
|
||||
```
|
||||
@ -120,10 +120,9 @@ dotnet test --logger "console;verbosity=detailed"
|
||||
- Real gateway message parsing
|
||||
- Concurrent event handling
|
||||
- File I/O and thread synchronization
|
||||
- End-to-end onboarding wizard flow (WebView2 requires runtime)
|
||||
|
||||
---
|
||||
|
||||
**Last Updated**: 2026-05-04
|
||||
**Last Updated**: 2026-03-18
|
||||
**Framework**: xUnit 2.9.3 / .NET 10.0
|
||||
**Status**: ✅ 1570 tests passing
|
||||
**Status**: ✅ 571 tests passing
|
||||
|
||||
@ -152,7 +152,7 @@ This operator-only mode provides Quick Send, embedded WebChat, Command Center di
|
||||
|--------|---------|
|
||||
| **Gateway** | WSL2 (Ubuntu) |
|
||||
| **Nodes** | OpenClaw.Tray registers as `role: "node"` from Windows |
|
||||
| **Capabilities** | Camera ✅ (MediaCapture API) Canvas ✅ (WebView2) Screen ✅ (Graphics Capture) Notifications ✅ (Toast + agent-driven) Browser ✅/⚠️ (local `browser.proxy` bridge; requires browser-control host on gateway port + 2) Exec ✅ (WSL2 + optionally Windows `cmd`/`powershell`) Location ⚠️ (Windows Location API — desktop, less useful) Voice/TTS ⚠️ (separate parity track) |
|
||||
| **Capabilities** | Camera ✅ (MediaCapture API) Canvas ✅ (WebView2) Screen ✅ (Graphics Capture) Notifications ✅ (Toast + agent-driven) Browser ❌ (WSL2 browser proxy) Exec ✅ (WSL2 + optionally Windows `cmd`/`powershell`) Location ⚠️ (Windows Location API — desktop, less useful) Audio/TTS ✅ (Windows Speech) |
|
||||
| **Networking** | WSL2 NAT still involved for gateway, but tray app connects outward to WSL2's WS — simpler direction. |
|
||||
| **Setup complexity** | Medium — WSL2 gateway + tray app auto-discovers and pairs |
|
||||
| **UX Rating** | ⭐⭐⭐⭐ Agent can now see and interact with Windows! |
|
||||
@ -169,7 +169,7 @@ The tray now also has a Command Center surface that combines gateway channel hea
|
||||
|--------|---------|
|
||||
| **Gateway** | Windows native (Node.js on Windows — `node.exe`) |
|
||||
| **Nodes** | OpenClaw.Tray as full Windows node |
|
||||
| **Capabilities** | Camera ✅ Canvas ✅ Screen ✅ Notifications ✅ Browser ✅/⚠️ (`browser.proxy` bridge; needs browser-control host on gateway+2) Exec ✅ (native `cmd.exe`, PowerShell, `wsl.exe`) Location ⚠️ Voice/TTS ⚠️ (separate parity track) |
|
||||
| **Capabilities** | Camera ✅ Canvas ✅ Screen ✅ Notifications ✅ Browser ✅ (Playwright on Windows) Exec ✅ (native `cmd.exe`, PowerShell, `wsl.exe`) Location ⚠️ Audio/TTS ✅ |
|
||||
| **Networking** | `ws://127.0.0.1:18789` — pure loopback, no NAT, no WSL2 networking issues |
|
||||
| **Setup complexity** | Low — `npm install -g openclaw && openclaw onboard` from PowerShell. Same as Mac. |
|
||||
| **UX Rating** | ⭐⭐⭐⭐⭐ True feature parity with Mac |
|
||||
@ -261,9 +261,9 @@ Niche scenario. If the "server" must be Windows for some reason, this works but
|
||||
| `location.get` | ✅ CLLocationManager | ✅ CLLocationManager | ✅ FusedLocation | ❌ | **✅** | Windows.Devices.Geolocation |
|
||||
| `device.info/status` | ✅ shared schema | ✅ shared schema | ✅ shared schema | ❌ | **✅** | .NET runtime, storage, network |
|
||||
| `sms.send` | ❌ | ❌ | ✅ | ❌ | ❌ | N/A |
|
||||
| Browser proxy | ✅ | ❌ | ❌ | ✅ Playwright | **✅/⚠️ Local bridge** | Browser-control host on gateway port + 2 |
|
||||
| Browser proxy | ✅ | ❌ | ❌ | ✅ Playwright | **⚠️ Future** | Playwright on Windows |
|
||||
| Accessibility | ✅ AX API | ❌ | ❌ | ❌ | **⚠️ Future** | UI Automation |
|
||||
| Speech/TTS | ✅ NSSpeechSynthesizer | ❌ | ❌ | ❌ | **⚠️ Planned** | Windows.Media.SpeechSynthesis |
|
||||
| Speech/TTS | ✅ NSSpeechSynthesizer | ❌ | ❌ | ❌ | **✅** | Windows.Media.SpeechSynthesis |
|
||||
| Microphone | ✅ AVAudioEngine | ✅ | ✅ | ❌ | **⚠️ Future** | Windows.Media.Audio |
|
||||
|
||||
---
|
||||
@ -292,7 +292,7 @@ The tray app uses a dedicated node connection (`WindowsNodeClient`) with `role:
|
||||
},
|
||||
"role": "node",
|
||||
"scopes": [],
|
||||
"caps": ["canvas", "camera", "screen", "notifications", "system", "device", "browser"],
|
||||
"caps": ["canvas", "camera", "screen", "notifications", "system", "device"],
|
||||
"commands": [
|
||||
"canvas.present", "canvas.hide", "canvas.navigate",
|
||||
"canvas.eval", "canvas.snapshot", "canvas.a2ui.push",
|
||||
@ -302,8 +302,7 @@ The tray app uses a dedicated node connection (`WindowsNodeClient`) with `role:
|
||||
"location.get",
|
||||
"device.info", "device.status",
|
||||
"system.run", "system.run.prepare", "system.which", "system.notify",
|
||||
"system.execApprovals.get", "system.execApprovals.set",
|
||||
"browser.proxy"
|
||||
"system.execApprovals.get", "system.execApprovals.set"
|
||||
],
|
||||
"permissions": {
|
||||
"camera.capture": true,
|
||||
@ -476,10 +475,6 @@ var stream = await synth.SynthesizeTextToStreamAsync(text);
|
||||
// Play via MediaElement or save to file
|
||||
```
|
||||
|
||||
This is a candidate implementation path, not an implemented node command yet. Voice/Talk mode parity should stay on its own track so Windows does not advertise a speech capability before there is a shared command contract and permission model.
|
||||
|
||||
Current PR review status: open PR #120 (`feature/voice-mode`) is a useful prototype but should not merge as-is. It currently conflicts with the active capability-settings branch, advertises `voice.*` commands without the default-off Settings gate used for other privacy-sensitive capability groups, widens operator scopes in the same PR, persists cloud TTS provider keys in plain settings JSON, and introduces a Windows-specific wire schema before the Mac runtime/controller/session contract is agreed. Safe next step: split schema, gateway scope, chat transport, Windows runtime, WebChat integration, and cloud-provider credentials into separate reviews; keep the first merge behind a default-off Voice Settings group and gateway dangerous-command allowlist.
|
||||
|
||||
---
|
||||
|
||||
## Architectural Questions
|
||||
@ -524,7 +519,7 @@ On macOS: launchd plist. On Linux: systemd unit. On Windows, options include:
|
||||
- **Startup folder** (simplest, least robust)
|
||||
- **Tray app manages gateway process** (like macOS menubar app can start/stop gateway)
|
||||
|
||||
The Mac menubar app has "Gateway start/stop/restart" in its menu. Windows Command Center can restart a tray-managed SSH tunnel, but it intentionally does not stop or kill externally managed gateway processes. If the gateway runs as a future Windows-managed process, the tray app could add explicit start/stop/restart controls for that owned process.
|
||||
The Mac menubar app has "Gateway start/stop/restart" in its menu. The tray app has this marked as ❌ in the parity table. If the gateway runs on Windows, the tray app could manage it.
|
||||
|
||||
### 4. WSL2 networking: the NAT problem
|
||||
|
||||
@ -579,7 +574,7 @@ The node protocol requires a stable device identity (`device.id`) derived from a
|
||||
- [x] `device.info` / `device.status` — metadata and lightweight status payloads
|
||||
- [x] `system.run` — exec commands on Windows (PowerShell/cmd) with ICommandRunner abstraction
|
||||
- [x] `system.execApprovals.get/set` — remote-manageable exec approval policy
|
||||
- [x] Settings UI for node capabilities (enable/disable canvas, screen, camera, location, browser proxy)
|
||||
- [ ] Settings UI for node capabilities (enable/disable camera, screen, etc.)
|
||||
- [x] Resolve #9 (WebView2 ARM64) — required for canvas
|
||||
|
||||
**Depends on:** #5 (Canvas Panel), #9 (WebView2 ARM64)
|
||||
@ -612,12 +607,10 @@ The node protocol requires a stable device identity (`device.id`) derived from a
|
||||
- [x] `location.get` — Windows Location API
|
||||
- [ ] TTS / Speech Synthesis
|
||||
- [ ] Microphone / voice input
|
||||
- [x] `browser.proxy` — local browser-control bridge on gateway port + 2, including SSH companion-forward diagnostics
|
||||
- [x] Browser-control host setup guidance and local host runtime smoke for end-to-end browser smoke tests
|
||||
- [ ] Bundled/browser-control host installer/launcher
|
||||
- [ ] Browser proxy (Playwright on Windows, launched by tray app)
|
||||
- [ ] UI Automation (Windows equivalent of macOS Accessibility API)
|
||||
- [ ] Auto-update improvements (current auto-update from GitHub Releases → MSI/MSIX?)
|
||||
- [x] PowerToys Command Palette integration for Command Center diagnostics entrypoint
|
||||
- [ ] PowerToys Command Palette integration for node commands
|
||||
|
||||
---
|
||||
|
||||
@ -687,18 +680,18 @@ This is a big effort and **contributions are very welcome!** Here's how to get s
|
||||
|
||||
### Good First Issues
|
||||
|
||||
1. **Capability diagnostics copy** — ✅ Command Center can copy a summary of declared commands, gateway allowlist status, and dangerous-command opt-ins.
|
||||
1. **Capability diagnostics copy** — Add a copyable summary that explains declared commands, gateway allowlist status, and dangerous-command opt-ins.
|
||||
2. **Gateway health summary** — Show version, update state, auth state, and active connection health in one panel.
|
||||
3. **Channel status cards** — Surface configured/running/error/probe state for channels.
|
||||
|
||||
### Medium Issues
|
||||
|
||||
4. **Browser proxy parity** — Windows now includes a Mac-compatible local `browser.proxy` bridge to the browser control host on gateway port + 2, and managed SSH tunnel mode forwards local+2 to remote+2 when the browser proxy capability is enabled; continue hardening live browser-host setup guidance and diagnostics.
|
||||
4. **Browser proxy parity** — Investigate a safe Windows implementation for Mac-compatible `browser.proxy`.
|
||||
5. **Gateway/channel flyout** — Show configured/running/error/probe state for channels and gateway health in the tray.
|
||||
|
||||
### Harder Issues
|
||||
|
||||
6. **Voice mode parity** — PR #120 has been reviewed and should stay blocked until it is rebased/split, gated default-off through Settings, aligned with a shared Mac/gateway voice command contract, and hardened for credential storage and permission prompts.
|
||||
6. **Voice mode parity** — Review the open Windows Voice Mode PR against the current Mac voice runtime/controller/session split.
|
||||
7. **Native Windows gateway audit** — Run `openclaw gateway` on Windows, identify and fix platform-specific failures.
|
||||
8. **Richer channel operations** — Add tray surfaces for channel configuration, probe status, token source, last error, and recovery actions.
|
||||
|
||||
@ -718,9 +711,9 @@ Requires .NET 10.0 SDK, Windows 10/11. For testing node protocol, you'll need a
|
||||
|
||||
## Open Questions
|
||||
|
||||
- [x] Should dangerous command opt-ins be shown in the tray as a guided repair flow, a docs link, or both? Command Center now shows copyable safety guidance but intentionally avoids one-click dangerous repair commands.
|
||||
- [ ] Should dangerous command opt-ins be shown in the tray as a guided repair flow, a docs link, or both?
|
||||
- [ ] How much channel management should live in the native tray versus opening the web dashboard?
|
||||
- [x] Should Voice Mode land as a separate parity track after the open PR is reviewed against current Mac architecture? Yes. PR #120 should not advertise voice commands from Windows until the shared contract, Settings gate, gateway allowlist, and credential-storage concerns are resolved.
|
||||
- [ ] Should Voice Mode land as a separate parity track after the open PR is reviewed against current Mac architecture?
|
||||
|
||||
---
|
||||
|
||||
|
||||
@ -37,7 +37,7 @@ The Windows Node feature allows the tray app to receive commands from the OpenCl
|
||||
### 4. Command Center
|
||||
- Open the tray status detail or launch `openclaw://commandcenter`
|
||||
- In Node Mode, verify the window shows gateway channel health from node `health` events plus a synthesized local Windows node when operator `node.list` is not connected
|
||||
- Check diagnostics for pairing approval, stale health, all-stopped channels, allowlist filtering, browser control host availability for `browser.proxy`, and usage-cost gaps
|
||||
- Check diagnostics for pairing approval, stale health, all-stopped channels, allowlist filtering, missing `browser.proxy` parity, and usage-cost gaps
|
||||
- Use "Copy fix" only for safe repair commands; privacy-sensitive commands remain informational unless you explicitly opt in on the gateway
|
||||
|
||||
## What Requires Gateway Support
|
||||
@ -54,15 +54,12 @@ These features need the gateway to send `node.invoke` commands:
|
||||
| `screen.snapshot` | Take screenshot | Captures screen, shows notification, returns base64 |
|
||||
| `screen.record` | Record short screen clip | Returns MP4/base64 metadata; requires explicit gateway allowlist |
|
||||
| `system.notify` | Show notification | Displays toast notification |
|
||||
| `system.run` / `system.which` | Controlled command execution | Uses local exec approval policy; `prompt` decisions show a Windows Allow once / Always allow / Deny dialog |
|
||||
| `system.run` / `system.which` | Controlled command execution | Uses local exec approval policy |
|
||||
| `camera.list` | Enumerate cameras | Returns device IDs and names |
|
||||
| `camera.snap` | Capture photo | Returns base64 image (NV12 fallback) |
|
||||
| `camera.clip` | Capture video clip | Returns MP4/base64 metadata |
|
||||
| `location.get` | Get Windows location | Uses Windows location permission/settings |
|
||||
| `device.info` / `device.status` | Device metadata/status | Returns host/app/locale plus battery/storage/network/uptime payloads |
|
||||
| `browser.proxy` | Proxy browser-control host requests | Requires Browser proxy bridge enabled, a compatible browser-control host listening on gateway port + 2, and matching browser-control auth |
|
||||
| `stt.transcribe` | Speech-to-text from default microphone | Default-off; bounded `maxDurationMs` ≤ 30000; concatenates phrases until duration elapses; requires explicit gateway allowlist |
|
||||
| `tts.speak` | Speak text aloud | Requires Text-to-speech playback enabled in Settings; gateway mode also requires `tts.speak` in `gateway.nodes.allowCommands` |
|
||||
|
||||
## Capabilities Advertised
|
||||
|
||||
@ -73,8 +70,6 @@ When the node connects, it advertises these capabilities:
|
||||
- `camera` - MediaCapture photo/video capture (frame reader fallback)
|
||||
- `location` - Windows.Devices.Geolocation
|
||||
- `device` - Host/app metadata and lightweight status
|
||||
- `browser` - Local `browser.proxy` bridge to a browser-control host on gateway port + 2, when enabled in Settings
|
||||
- `tts` - Windows speech synthesis or ElevenLabs playback, when enabled in Settings
|
||||
|
||||
## Security Features
|
||||
|
||||
@ -96,15 +91,6 @@ When the node connects, it advertises these capabilities:
|
||||
- Ensure Windows notifications are enabled for the app
|
||||
- Check if notification settings in the app are enabled
|
||||
|
||||
### `browser.proxy` reports no browser-control host
|
||||
- Confirm the Browser proxy bridge toggle is enabled in Settings, then save and reconnect or re-pair if the gateway keeps an older command snapshot.
|
||||
- The bridge is local-only: it calls `http://127.0.0.1:<gateway-port+2>` from Windows. For a gateway on `ws://127.0.0.1:18789`, the browser-control host must listen on `127.0.0.1:18791`.
|
||||
- In managed SSH tunnel mode, keep Browser proxy bridge enabled so the tray forwards local gateway port + 2 to remote gateway port + 2. Settings shows a selectable preview of the exact `ssh -N -L ...` command.
|
||||
- If using a manual SSH tunnel, add both forwards, for example: `ssh -N -L 18789:127.0.0.1:18789 -L 18791:127.0.0.1:18791 <user>@<host>`. If local and remote gateway ports differ, forward `<local-gateway-port+2>` to `127.0.0.1:<remote-gateway-port+2>`.
|
||||
- A local SSH forward is not enough if the remote browser-control host is not running. Command Center port diagnostics should show whether the local gateway and browser-control ports are listening and which process owns them.
|
||||
- If Command Center shows the browser-control port listening but `browser.proxy` returns an auth error, verify the Windows Settings gateway token matches the browser-control host token/password. QR/bootstrap pairing can connect the node without saving a shared gateway token, but browser-control auth may still require one.
|
||||
- A local smoke can verify the host dependency without proving gateway invoke auth: start the upstream browser-control host with a temporary no-secret config, confirm `http://127.0.0.1:<gateway-port+2>/` and `/tabs` return HTTP 200, then stop the captured host process. The full parity smoke is not complete until `openclaw nodes invoke --command browser.proxy` succeeds through the active gateway.
|
||||
|
||||
### Canvas window doesn't appear
|
||||
- Check logs for `canvas.present` command received
|
||||
- Verify URL is not blocked by security validation
|
||||
@ -113,48 +99,13 @@ When the node connects, it advertises these capabilities:
|
||||
- If you see "Camera access blocked", enable camera access for desktop apps in Windows Privacy settings
|
||||
- Packaged MSIX builds will show the system consent prompt automatically
|
||||
|
||||
### `stt.transcribe` returns "Speech recognition failed" or "Internal Speech Error"
|
||||
- Open Windows Settings → Privacy & security → Speech (`ms-settings:privacy-speech`)
|
||||
- Turn **Online speech recognition** = On. The Windows speech recognizer's default dictation grammar often fails without it, and Windows surfaces an unmapped HRESULT as "Internal Speech Error"
|
||||
- Open Windows Settings → Time & language → Language & region (`ms-settings:regionlanguage`), select your display language → Language options, and confirm **Speech** appears under Installed features (install it if not, ~50 MB; reboot or sign out/in afterward)
|
||||
- Verify the recognizer end-to-end with `ms-settings:speech` → "Microphone" → **Get started** before re-trying `stt.transcribe`
|
||||
|
||||
### `stt.transcribe` returns "Microphone permission denied"
|
||||
- Open Windows Settings → Privacy & security → Microphone
|
||||
- Ensure **Microphone access** (top-level toggle) is on
|
||||
- For **unpackaged** tray builds (the default `.\build.ps1` output): ensure **Let desktop apps access your microphone** is on. The tray exe will **not** appear as its own row — desktop-app access is granted as a group, not per-app
|
||||
- For **packaged MSIX** tray builds: the tray appears as its own entry under "Let apps access your microphone" and must be individually enabled (the OS shows a consent prompt on first use)
|
||||
- After changing permissions, re-pair the node so the gateway picks up the new advertised command
|
||||
|
||||
### `stt.transcribe` returns "Language pack 'X' is not installed"
|
||||
- Open Windows Settings → Time & language → Language & region
|
||||
- Add the requested display language and ensure the **Speech** optional feature is installed
|
||||
- Restart the tray after installing the speech pack
|
||||
|
||||
### Manual STT validation
|
||||
1. Enable Node Mode in Settings.
|
||||
2. Enable **Speech-to-text (microphone)** in Settings → Node mode.
|
||||
3. Append `stt.transcribe` to your existing gateway allowlist (do **not** copy a literal `...` — substitute the commands you already allow). For example, starting from the recommended Windows safe companion list:
|
||||
```bash
|
||||
openclaw config set gateway.nodes.allowCommands '["canvas.present","canvas.hide","canvas.navigate","canvas.eval","canvas.snapshot","canvas.a2ui.push","canvas.a2ui.pushJSONL","canvas.a2ui.reset","camera.list","location.get","screen.snapshot","device.info","device.status","system.execApprovals.get","system.execApprovals.set","stt.transcribe"]'
|
||||
openclaw gateway restart
|
||||
```
|
||||
4. Re-pair or re-approve the node so the gateway refreshes its command snapshot.
|
||||
5. Invoke and speak a short phrase:
|
||||
```bash
|
||||
openclaw nodes invoke --node <id> --command stt.transcribe \
|
||||
--params '{"maxDurationMs":5000,"language":"en-US"}'
|
||||
```
|
||||
6. The Windows microphone OS indicator should appear during recognition. Confirm a `transcribed:true` payload returns the text.
|
||||
|
||||
## Remaining Work (Roadmap)
|
||||
|
||||
1. ~~**system.run + exec approvals**~~ ✅ Implemented
|
||||
- `system.run` with PowerShell/cmd support
|
||||
- `system.run.prepare` pre-flight command
|
||||
- `system.which` command lookup
|
||||
- `system.execApprovals` allowlist flow with base-hash optimistic concurrency for remote edits
|
||||
- `system.run` environment override sanitizer blocks path/toolchain injection and secret-looking variables
|
||||
- `system.execApprovals` allowlist flow
|
||||
2. ~~**screen.record**~~ ✅ Implemented
|
||||
- Graphics Capture video recording (MP4/base64)
|
||||
3. ~~**camera.clip**~~ ✅ Implemented
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
# A2UI v0.8 — Overview & Implementation Grading
|
||||
|
||||
This folder is the entry point for everything A2UI in this repo. It captures
|
||||
the v0.8 specification, the standard catalog, and a side-by-side grading of
|
||||
two implementations:
|
||||
|
||||
- **Lit reference** in `C:\Users\andersonch\Code\openclaw` (web components,
|
||||
rendered in a browser via the OpenClaw canvas host).
|
||||
- **Native WinUI** in this repo (`src/OpenClaw.Tray.WinUI/A2UI/`,
|
||||
branch `feat/a2ui-native-winui`).
|
||||
|
||||
The native WinUI design doc that predates this overview lives at
|
||||
[`../A2UI_NATIVE_WINUI.md`](../A2UI_NATIVE_WINUI.md); this folder
|
||||
supersedes the parts of that doc that describe the spec and adds the
|
||||
grading.
|
||||
|
||||
## Contents
|
||||
|
||||
| Doc | What's in it |
|
||||
| --- | --- |
|
||||
| [`protocol.md`](./protocol.md) | Wire protocol — envelopes, JSONL, A2A extension, capability negotiation, lifecycle |
|
||||
| [`components.md`](./components.md) | Standard catalog — every component, every property, type, enum, behavior |
|
||||
| [`data-and-actions.md`](./data-and-actions.md) | A2UIValue tagged union, data model & paths, action dispatch, security |
|
||||
| [`grading.md`](./grading.md) | Side-by-side scoring of Lit vs WinUI vs spec, with file:line citations |
|
||||
|
||||
## Spec source of truth
|
||||
|
||||
| Document | URL |
|
||||
| --- | --- |
|
||||
| Protocol v0.8 | https://a2ui.org/specification/v0.8-a2ui/ |
|
||||
| A2A extension v0.8 | https://a2ui.org/specification/v0.8-a2a-extension/ |
|
||||
| Standard catalog (JSON) | https://a2ui.org/specification/v0_8/standard_catalog_definition.json |
|
||||
| Source / schemas | https://github.com/google/A2UI |
|
||||
| Evolution v0.8 → v0.9 | https://a2ui.org/specification/v0.9-evolution-guide/ |
|
||||
|
||||
These pages were captured 2026-04-27. v0.8 is the **stable / public preview**
|
||||
release; v0.9 exists as a draft.
|
||||
|
||||
## TL;DR — how the two implementations stack up
|
||||
|
||||
| Area | Lit (OpenClaw) | WinUI (this repo) | Spec |
|
||||
| --- | --- | --- | --- |
|
||||
| Component coverage | 18/18 | 18/18 | 18 in standard catalog |
|
||||
| Component property completeness | ~85% (4 documented TODOs) | ~95% | — |
|
||||
| Streaming / JSONL parser | Per-line, lenient | Per-line, lenient + size caps | line-delimited JSON |
|
||||
| Data model paths | Custom JSON-pointer-ish + auto-parse | Strict RFC 6901 | Path strings, format underspecified |
|
||||
| Action transport | DOM `CustomEvent` bubbling | Debounced dispatcher → gateway via `agent.request` | Client-to-server A2A `userAction` |
|
||||
| Bi-directional binding | ✓ via `processor.setData` | ✓ via `DataModelStore.Write` | Spec is silent — both impls add it |
|
||||
| Markdown in `Text` | ✓ (sandboxed iframe for HTML, escaped code) | ✗ (plain text only) | Spec is silent |
|
||||
| Modal | `<dialog>` w/ `showModal()` | `ContentDialog` (native) | Spec leaves shape open |
|
||||
| List virtualization | ✗ (StackPanel-style, all-at-once) | ✓ `ItemsRepeater` + cached child template | Spec calls for it |
|
||||
| URL safety / SSRF | None — passes URLs through to `<img>`/`<video>` | HTTPS+allowlist for `Image`/`Video`/`AudioPlayer`; DNS-rebinding pin via `SocketsHttpHandler.ConnectCallback` on `Image` only — `Video`/`AudioPlayer` hand the URI to `MediaSource.CreateFromUri`, which re-resolves at playback | Spec is silent (deferred) |
|
||||
| Secret redaction | ✗ | ✓ denylist (`password`, `secret`, `token`) + registered paths | Spec is silent |
|
||||
| Action context scoping | Caller's responsibility | Explicit `dataBinding` + implicit walk + secret filter | Spec defines `context[]` only |
|
||||
| Test coverage | One model unit test; no per-component | Render matrix, scale test, security tests, integration smoke | — |
|
||||
|
||||
The detailed scorecard with deductions per category is in
|
||||
[`grading.md`](./grading.md).
|
||||
|
||||
## How to use this folder
|
||||
|
||||
- If you're **adding a renderer** for a component: read
|
||||
[`components.md`](./components.md) for the spec'd properties, then
|
||||
[`grading.md`](./grading.md) for known WinUI gaps.
|
||||
- If you're **wiring a transport** (gateway, MCP bridge, etc.): read
|
||||
[`protocol.md`](./protocol.md) and the `data-and-actions.md` action
|
||||
section.
|
||||
- If you're **reviewing a PR that touches A2UI**: skim
|
||||
[`grading.md#known-deviations-by-category`](./grading.md#known-deviations-by-category)
|
||||
to see which deviations are intentional (good) vs known gaps (bad).
|
||||
@ -1,323 +0,0 @@
|
||||
# A2UI v0.8 — Standard Catalog (Components)
|
||||
|
||||
Source of truth: <https://a2ui.org/specification/v0_8/standard_catalog_definition.json>.
|
||||
|
||||
The v0.8 standard catalog defines **18 components** across three loose
|
||||
categories: containers, display, interactive. A v0.8-conformant client
|
||||
MUST recognize all 18 and either render them or fall back to an "unknown"
|
||||
placeholder for catalog-strict mode.
|
||||
|
||||
Each section below is the spec — required properties first, optional
|
||||
after, with enums spelled out. Where the WinUI or Lit impl has a known gap
|
||||
or improvement, it's flagged inline so this doc doubles as a quick lookup
|
||||
when wiring a new component. Detailed grading is in
|
||||
[`grading.md`](./grading.md).
|
||||
|
||||
Notation: `BoundValue` means an [`A2UIValue`](./data-and-actions.md#a2uivalue)
|
||||
tagged union — typically `{ literalString }` or `{ path }`. `Children`
|
||||
means `{ explicitList: string[] }` or `{ template: { dataBinding, componentId } }`.
|
||||
|
||||
---
|
||||
|
||||
## Containers
|
||||
|
||||
### `Row`
|
||||
Horizontal layout container.
|
||||
|
||||
| Property | Type | Required | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| `children` | `Children` | ✓ | `explicitList` or `template` |
|
||||
| `distribution` | enum | | `start` \| `center` \| `end` \| `spaceBetween` \| `spaceAround` \| `spaceEvenly` |
|
||||
| `alignment` | enum | | `start` \| `center` \| `end` \| `stretch` |
|
||||
|
||||
**Behavior**: lays children left-to-right; cross-axis = vertical alignment.
|
||||
|
||||
> WinUI: `StackPanel` (horizontal); `distribution` collapsed onto WinUI
|
||||
> `HorizontalAlignment` — `spaceBetween`/`spaceAround`/`spaceEvenly` all
|
||||
> map to `Stretch` (justify-content not natively available). Wrap to next
|
||||
> row not implemented.
|
||||
> Lit: full distribution support via CSS flex.
|
||||
|
||||
### `Column`
|
||||
Vertical layout container. Same property set as `Row`, swapping axes.
|
||||
|
||||
### `List`
|
||||
Scrollable list of children.
|
||||
|
||||
| Property | Type | Required | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| `children` | `Children` | ✓ | |
|
||||
| `direction` | enum | | `vertical` (default) \| `horizontal` |
|
||||
| `alignment` | enum | | `start` \| `center` \| `end` \| `stretch` |
|
||||
|
||||
**Behavior**: virtualization-friendly; spec calls for the client to
|
||||
realize only viewport children when possible.
|
||||
|
||||
> WinUI: `ItemsRepeater` w/ `StackLayout`, virtualized, child-element
|
||||
> cache keyed by component id (preserves data-binding subscriptions
|
||||
> across recycling).
|
||||
> Lit: builds all children up-front (no virtualization).
|
||||
> Lit: `template` form for List is partially honored only because all
|
||||
> three list-bearing components share the same children resolver. WinUI
|
||||
> only supports `explicitList` today.
|
||||
|
||||
### `Card`
|
||||
Single-child container with elevation/border treatment.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `child` | component-id (string) | ✓ |
|
||||
|
||||
> WinUI: `Border` w/ `CardBackgroundFillColorDefaultBrush`,
|
||||
> `theme.CornerRadius`, padding = `theme.Spacing * 2`.
|
||||
> Lit: slot-based wrap; CSS-driven elevation.
|
||||
|
||||
### `Tabs`
|
||||
Tabbed container.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `tabItems[]` | array | ✓ |
|
||||
| `tabItems[].title` | `BoundValue<string>` | ✓ |
|
||||
| `tabItems[].child` | component-id | ✓ |
|
||||
|
||||
> WinUI: `TabView`, close buttons disabled, no reorder/drag.
|
||||
> Lit: button strip + content region; tracks `selected` index in state.
|
||||
|
||||
### `Modal`
|
||||
Click-to-open dialog.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `entryPointChild` | component-id | ✓ |
|
||||
| `contentChild` | component-id | ✓ |
|
||||
|
||||
**Behavior**: render `entryPointChild` inline; on user interaction (e.g.,
|
||||
click), open a modal containing `contentChild`. Spec leaves "what closes
|
||||
the modal" open; both impls rely on platform dismissal (Esc, click-out).
|
||||
|
||||
> WinUI: `ContentDialog` triggered by wrapping `entryPointChild` in a
|
||||
> transparent `Button`. Native modal semantics.
|
||||
> Lit: `<dialog>` element + `showModal()`.
|
||||
|
||||
### `Divider`
|
||||
Visual separator.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `axis` | enum | | `horizontal` (default) \| `vertical` |
|
||||
|
||||
> WinUI: 1px `Rectangle`, `SystemControlForegroundBaseLowBrush`.
|
||||
> Lit: `<hr>`. **Gap**: Lit also exposes `thickness` and `color` in
|
||||
> types but doesn't apply them (root.ts:317 TODO).
|
||||
|
||||
---
|
||||
|
||||
## Display
|
||||
|
||||
### `Text`
|
||||
Text display.
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `text` | `BoundValue<string>` | ✓ |
|
||||
| `usageHint` | enum | | `h1`–`h5`, `caption`, `body` |
|
||||
|
||||
> WinUI: `TextBlock` w/ Fluent theme styles (`TitleLarge`, `Subtitle`,
|
||||
> `BodyStrong`, `Caption`, `Body`). Plain text only.
|
||||
> Lit: **renders Markdown** via `markdown-it`. HTML blocks sandboxed in
|
||||
> `<iframe sandbox="">`; code blocks escaped. This is _beyond_ spec —
|
||||
> see [`grading.md`](./grading.md#text-markdown-divergence) for whether
|
||||
> that's a feature or a foot-gun.
|
||||
|
||||
### `Image`
|
||||
|
||||
| Property | Type | Required | Enum |
|
||||
| --- | --- | --- | --- |
|
||||
| `url` | `BoundValue<string>` | ✓ | |
|
||||
| `altText` | `BoundValue<string>` | | |
|
||||
| `fit` | enum | | `contain` \| `cover` \| `fill` \| `none` \| `scale-down` |
|
||||
| `usageHint` | enum | | `icon` \| `avatar` \| `smallFeature` \| `mediumFeature` \| `largeFeature` \| `header` |
|
||||
|
||||
> WinUI: `Image`; `usageHint` maps to fixed pixel sizes (24/40/80/160/240/full).
|
||||
> Avatar wraps in `Border` w/ circular `CornerRadius`. SVG via
|
||||
> `SvgImageSource` w/ 8s timeout. URLs gated by `MediaResolver` allowlist
|
||||
> + DNS-rebinding defense.
|
||||
> Lit: `<img>` directly; **no URL filtering** — `data:` and other schemes
|
||||
> pass through.
|
||||
|
||||
### `Icon`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `name` | `BoundValue<string>` | ✓ |
|
||||
|
||||
The 48 supported icon names (canonical enum):
|
||||
|
||||
```
|
||||
accountCircle, add, arrowBack, arrowForward, attachFile, calendarToday,
|
||||
call, camera, check, close, delete, download, edit, event, error,
|
||||
favorite, favoriteOff, folder, help, home, info, locationOn, lock,
|
||||
lockOpen, mail, menu, moreVert, moreHoriz, notificationsOff,
|
||||
notifications, payment, person, phone, photo, print, refresh, search,
|
||||
send, settings, share, shoppingCart, star, starHalf, starOff, upload,
|
||||
visibility, visibilityOff, warning
|
||||
```
|
||||
|
||||
> WinUI: `FontIcon` over Segoe Fluent Icons (MDL2). Unknown names →
|
||||
> `help` glyph; `moreHoriz` reuses `moreVert` (no canonical horizontal
|
||||
> ellipsis in MDL2). Logs once per unmapped name per process.
|
||||
> Lit: CSS background-image sprite; lowercases CamelCase to snake_case
|
||||
> at lookup (icon.ts:53).
|
||||
|
||||
### `Video`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `url` | `BoundValue<string>` | ✓ |
|
||||
|
||||
> WinUI: `MediaPlayerElement` w/ transport controls. URL gated by
|
||||
> `MediaResolver` HTTPS+allowlist. **No DNS-rebinding pin** — the OS
|
||||
> media stack does its own DNS lookup at playback time, so the
|
||||
> hostname-allowlist is the load-bearing defense (image fetches use a
|
||||
> separate, safer path that does pin).
|
||||
> Lit: `<video controls>`.
|
||||
|
||||
### `AudioPlayer`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `url` | `BoundValue<string>` | ✓ |
|
||||
| `description` | `BoundValue<string>` | | |
|
||||
|
||||
> WinUI: `MediaPlayerElement` w/ `description` rendered above as
|
||||
> `Caption`. URL gated by `MediaResolver` HTTPS+allowlist; same
|
||||
> playback-time DNS caveat as `Video`.
|
||||
> Lit: `<audio controls>`; **`description` is ignored** (audio.ts).
|
||||
|
||||
---
|
||||
|
||||
## Interactive
|
||||
|
||||
### `Button`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `child` | component-id | ✓ |
|
||||
| `action` | `Action` object | ✓ |
|
||||
| `primary` | bool | | |
|
||||
|
||||
`Action` shape:
|
||||
```json
|
||||
{
|
||||
"name": "submit",
|
||||
"context": [
|
||||
{ "key": "email", "value": { "path": "/form/email" } },
|
||||
{ "key": "kind", "value": { "literalString": "primary" } }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
> WinUI: `Button`; `primary` → `AccentButtonStyle`; click raises
|
||||
> `A2UIAction` through the dispatcher (see
|
||||
> [`data-and-actions.md`](./data-and-actions.md#actions)).
|
||||
> Lit: `<button>`; click dispatches a DOM `CustomEvent`.
|
||||
|
||||
### `CheckBox`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `label` | `BoundValue<string>` | ✓ |
|
||||
| `value` | `BoundValue<bool>` | ✓ |
|
||||
|
||||
> Both impls: bi-directional binding — toggle writes back to the
|
||||
> `value.path` data-model location. Spec is silent on write-back.
|
||||
|
||||
### `TextField`
|
||||
|
||||
| Property | Type | Required | Enum |
|
||||
| --- | --- | --- | --- |
|
||||
| `label` | `BoundValue<string>` | ✓ | |
|
||||
| `text` | `BoundValue<string>` | | |
|
||||
| `textFieldType` | enum | | `shortText` (default) \| `longText` \| `number` \| `date` \| `obscured` |
|
||||
| `validationRegexp` | string | | |
|
||||
|
||||
> WinUI: `TextBox` (or `PasswordBox` if `obscured`); `obscured` paths
|
||||
> auto-marked as secrets. `InputScope` set per type. **`validationRegexp`
|
||||
> not enforced**.
|
||||
> Lit: `<input>` / `<textarea>`. **`validationRegexp` not enforced**
|
||||
> (root.ts:367 TODO).
|
||||
|
||||
### `DateTimeInput`
|
||||
|
||||
| Property | Type | Required | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| `value` | `BoundValue<string>` | ✓ | ISO 8601 |
|
||||
| `enableDate` | bool | | |
|
||||
| `enableTime` | bool | | |
|
||||
|
||||
> WinUI: `CalendarDatePicker` + `TimePicker`; ISO-8601 round-trip.
|
||||
> Lit: `<input type="date|time|datetime-local">`. **`outputFormat`
|
||||
> noted in code but ignored** (datetime-input.ts:159 TODO).
|
||||
|
||||
### `MultipleChoice`
|
||||
|
||||
| Property | Type | Required | Enum |
|
||||
| --- | --- | --- | --- |
|
||||
| `selections` | `BoundValue<array>` (or `path`) | ✓ | |
|
||||
| `options[]` | array | ✓ | each: `{ label: BoundValue<string>, value: string }` |
|
||||
| `maxAllowedSelections` | integer | | |
|
||||
| `variant` | enum | | `checkbox` \| `chips` |
|
||||
| `filterable` | bool | | |
|
||||
|
||||
> WinUI: `ComboBox` (single) or `ListView` (multi). When
|
||||
> `maxAllowedSelections == 1` it writes a scalar to the path (not a
|
||||
> 1-element array) — back-compat reads tolerate either. **`variant` and
|
||||
> `filterable` not honored**.
|
||||
> Lit: `<select multiple>`. **`maxAllowedSelections` not enforced**
|
||||
> (root.ts:334 TODO); selections array resolution incomplete
|
||||
> (multiple-choice.ts:87–103).
|
||||
|
||||
### `Slider`
|
||||
|
||||
| Property | Type | Required |
|
||||
| --- | --- | --- |
|
||||
| `label` | `BoundValue<string>` | | |
|
||||
| `value` | `BoundValue<number>` | ✓ |
|
||||
| `minValue` | number | | |
|
||||
| `maxValue` | number | | |
|
||||
|
||||
> WinUI: `Slider`, defaults min=0/max=100/step=1. Bi-directional bind.
|
||||
> Lit: `<input type="range">`. Bi-directional bind.
|
||||
|
||||
---
|
||||
|
||||
## Catalog-strict mode
|
||||
|
||||
Both implementations must reject **anything not in the 18 above** by
|
||||
rendering a placeholder, not by throwing. This is one of the few
|
||||
"MUST" requirements in the spec:
|
||||
|
||||
> The full set of available component types and their properties is
|
||||
> defined by a Catalog Schema, not in the core protocol schema.
|
||||
|
||||
> WinUI: `UnknownRenderer` — orange-bordered placeholder w/ warning
|
||||
> icon and component name. Telemetry event fired.
|
||||
> Lit: walks a `componentRegistry`; allows custom components when
|
||||
> `enableCustomElements` flag is set (extension beyond spec).
|
||||
|
||||
## Catalog-level styles (theme tokens)
|
||||
|
||||
Each catalog optionally declares `styles`:
|
||||
|
||||
| Token | Type |
|
||||
| --- | --- |
|
||||
| `font` | string (font family) |
|
||||
| `primaryColor` | hex `#RRGGBB` |
|
||||
|
||||
> WinUI: `A2UITheme.Parse()` reads these plus nested
|
||||
> `colors.{accent,background,foreground,card}`,
|
||||
> `typography.fontFamily`, `radius`, `spacing`. Applied to the surface
|
||||
> Grid resource scope (not global).
|
||||
> Lit: derives a `--p-0` … `--p-100` palette via CSS `color-mix` from
|
||||
> `primaryColor`.
|
||||
@ -1,190 +0,0 @@
|
||||
# A2UI v0.8 — Data Binding & Actions
|
||||
|
||||
## A2UIValue
|
||||
|
||||
Almost every property on a component is an `A2UIValue` — a tagged union of
|
||||
literal types and a path into the data model.
|
||||
|
||||
```jsonc
|
||||
// All of these are valid:
|
||||
{ "literalString": "Hello" }
|
||||
{ "literalNumber": 42 }
|
||||
{ "literalBoolean": true }
|
||||
{ "literalArray": ["a", "b", "c"] } // array-of-string only
|
||||
{ "path": "/user/name" } // bind to data-model location
|
||||
|
||||
// "Implicit initialization" (literal + path together):
|
||||
{ "literalString": "default", "path": "/form/title" }
|
||||
// → on first resolve, the client writes "default" to /form/title,
|
||||
// then binds. After that it's a path binding.
|
||||
```
|
||||
|
||||
The spec does **not** enumerate `literalArray<number>` or `literalArray<bool>`
|
||||
— string arrays are the only explicit array literal in v0.8.
|
||||
|
||||
### Resolution at runtime
|
||||
|
||||
When a component renders or re-renders, each `A2UIValue` property is
|
||||
resolved:
|
||||
|
||||
1. If a literal is present → use it. (Casting is impl-defined; both
|
||||
impls coerce numbers ↔ strings as needed for display.)
|
||||
2. Else if `path` is present → look up the value in the surface's data
|
||||
model and use it.
|
||||
3. Else → property is "unset" (component decides default behavior).
|
||||
|
||||
### Path syntax
|
||||
|
||||
Paths are JSON-pointer-_ish_ strings (`/foo/bar/0`). The spec doesn't
|
||||
formally cite RFC 6901; both impls treat them similarly but differ at
|
||||
edges:
|
||||
|
||||
- **WinUI**: strict RFC 6901 via `DataModelStore.SetByPointer` /
|
||||
`Read` (`src/OpenClaw.Tray.WinUI/A2UI/DataModel/DataModelStore.cs`).
|
||||
- **Lit**: relative paths supported (`.` = current `dataContextPath`,
|
||||
bare names resolve relative to context); auto-parses `valueString`
|
||||
fields that look like JSON (`vendor/a2ui/.../model-processor.ts:198–225`).
|
||||
This is convenient but can be surprising — a string `"[1,2]"` becomes
|
||||
an array.
|
||||
|
||||
## Data model
|
||||
|
||||
A surface's data model is a JSON tree. `dataModelUpdate` envelopes patch
|
||||
into this tree:
|
||||
|
||||
```jsonc
|
||||
{ "dataModelUpdate": {
|
||||
"surfaceId": "main",
|
||||
"path": "/user",
|
||||
"contents": [
|
||||
{ "key": "name", "valueString": "Ada" },
|
||||
{ "key": "age", "valueNumber": 36 },
|
||||
{ "key": "tags", "valueArray": [
|
||||
{ "valueString": "admin" }, { "valueString": "beta" }
|
||||
]},
|
||||
{ "key": "address","valueMap": [
|
||||
{ "key": "city", "valueString": "London" }
|
||||
]}
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
Behaviors **not nailed down by the spec** that matter in practice:
|
||||
|
||||
| Question | Lit | WinUI |
|
||||
| --- | --- | --- |
|
||||
| Replace vs. merge `valueMap`? | Merge per leaf | Merge per leaf (RFC 6901 set) |
|
||||
| Notification granularity? | Coalesced via Lit signals | Coalesced via subscription set |
|
||||
| Per-update size caps? | None | 1024 entries / update; 256-char keys; 64 KiB strings; 32-deep maps |
|
||||
|
||||
### Subscriptions
|
||||
|
||||
Components watch the model so they can re-render when the agent or another
|
||||
component writes:
|
||||
|
||||
- **Lit**: `@lit-labs/signals`; the root applies an `effect()` to the
|
||||
`childComponents` signal so the light-DOM tree re-renders when the
|
||||
signal fires (`vendor/a2ui/.../ui/root.ts:39, 85`).
|
||||
- **WinUI**: `DataModelObservable.Subscribe(path, callback)` returns
|
||||
`IDisposable`; renderers call `ctx.WatchValue(componentId, name, value, callback)`
|
||||
which installs a per-component subscription that's torn down when the
|
||||
component is recycled (`src/OpenClaw.Tray.WinUI/A2UI/Rendering/IComponentRenderer.cs`).
|
||||
|
||||
## Actions
|
||||
|
||||
A `Button.action` (and other action-bearing properties) declares
|
||||
**what to send to the agent**:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"name": "submit",
|
||||
"context": [
|
||||
{ "key": "email", "value": { "path": "/form/email" } },
|
||||
{ "key": "kind", "value": { "literalString": "primary" } }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
When the user clicks, the client must:
|
||||
|
||||
1. Resolve every `context[].value` against the data model right now.
|
||||
2. Build a `userAction` event:
|
||||
```jsonc
|
||||
{ "userAction": {
|
||||
"name": "submit",
|
||||
"surfaceId": "main",
|
||||
"sourceComponentId": "btn-1",
|
||||
"timestamp": "2026-04-27T17:05:00Z",
|
||||
"context": { "email": "ada@example.com", "kind": "primary" }
|
||||
}}
|
||||
```
|
||||
3. Send it back via A2A (the spec is explicit: **not** on the SSE/JSONL
|
||||
stream).
|
||||
|
||||
### What "context" should and shouldn't contain
|
||||
|
||||
The spec is silent on **scoping** — i.e., is it OK for a Button to
|
||||
declare `context: [{ key: "all", value: { path: "/" } }]` and exfiltrate
|
||||
the entire data model?
|
||||
|
||||
The two impls take very different positions here:
|
||||
|
||||
- **Lit**: passes `action` and `dataContextPath` straight through in a
|
||||
DOM `CustomEvent`. The host (canvas) is responsible for resolving and
|
||||
sanitizing — there's no defense at the renderer.
|
||||
- **WinUI**: `RenderContext.BuildActionContext()` (`IComponentRenderer.cs:183–249`)
|
||||
collects an **allowed-paths set** from either:
|
||||
- explicit `dataBinding: [ { path: "..." } ]` on the component, or
|
||||
- implicit walk over component properties' own `A2UIValue.path` values.
|
||||
|
||||
Each declared `context[].path` is then `IsAllowedPath`-filtered (exact
|
||||
match or ancestor with `/` boundary). Secret paths (registered or
|
||||
denylisted by substring) are excluded unless explicitly allowed.
|
||||
|
||||
This is one of the most consequential **good deviations** in the WinUI
|
||||
impl — see [`grading.md#security-deviations`](./grading.md#security-deviations).
|
||||
|
||||
### Transport
|
||||
|
||||
After context is built, both impls hand off to a transport:
|
||||
|
||||
- **Lit**: dispatches `StateEvent<"a2ui.action">` (CustomEvent, bubbling,
|
||||
composed). Listener wires up however the embedding app wants.
|
||||
- **WinUI**: `ActionDispatcher` (`src/OpenClaw.Tray.WinUI/A2UI/Actions/IActionSink.cs`):
|
||||
- **Debounces** by `surfaceId|sourceComponentId|name` (200 ms window).
|
||||
- **Single-flight gate** so a fallback dequeue can't race a fresh send.
|
||||
- **Fallback queue** when no transport is connected.
|
||||
- Tries each registered transport (`GatewayActionTransport`,
|
||||
`LoggingActionTransport`) until one delivers.
|
||||
|
||||
For the gateway path, `GatewayActionTransport`
|
||||
(`src/OpenClaw.Tray.WinUI/A2UI/Actions/GatewayActionTransport.cs`) emits
|
||||
an `agent.request` node event:
|
||||
|
||||
```jsonc
|
||||
{
|
||||
"message": "CANVAS_A2UI action=submit session=main surface=main component=btn-1 host=… instance=… ctx=… default=update_canvas",
|
||||
"sessionKey": "main",
|
||||
"thinking": "low",
|
||||
"deliver": false,
|
||||
"key": "<action-id>"
|
||||
}
|
||||
```
|
||||
|
||||
`AgentMessageFormatter` is a deliberate byte-for-byte port of the Android
|
||||
node's formatter — the gateway parses tags identically across platforms.
|
||||
|
||||
## Security boundaries
|
||||
|
||||
| Concern | Spec | Lit | WinUI |
|
||||
| --- | --- | --- | --- |
|
||||
| URL fetching for `Image`/`Video`/`AudioPlayer` | silent | unrestricted | HTTPS+allowlist for all three; DNS-rebinding pin only on `Image` fetches (`MediaResolver.cs`'s `SocketsHttpHandler.ConnectCallback`). `Video`/`AudioPlayer` hand the validated URI to `MediaSource.CreateFromUri`, which performs its own DNS at playback — allowlist is the load-bearing defense for media. |
|
||||
| Unknown component types | "render placeholder, don't crash" | placeholder for spec'd missing; **registers user-supplied custom elements** if a flag is set | strict 18-only `UnknownRenderer` placeholder |
|
||||
| Markdown / HTML in `Text` | spec says plain string | parses Markdown; HTML blocks rendered in `iframe sandbox=""`; code escaped | renders as plain string |
|
||||
| Action context leakage | underspecified | passthrough — host's problem | server allowlist + secret denylist |
|
||||
| Bearer / token surfaces | n/a | n/a | MCP token shown in Settings UI w/ copy button (out-of-band) |
|
||||
| `canvas.navigate` | n/a (out of A2UI) | n/a | `HttpUrlValidator` gates URLs; user choice of "canvas" vs "browser" opener |
|
||||
|
||||
The "Spec is silent" rows are the spots where a reviewer should keep
|
||||
their guard up — anything Lit forwards to the embedding host can become
|
||||
a vulnerability if that host doesn't apply policy.
|
||||
@ -1,355 +0,0 @@
|
||||
# A2UI v0.8 — Implementation Grading
|
||||
|
||||
This grades two implementations against the v0.8 spec
|
||||
(<https://a2ui.org/specification/v0.8-a2ui/>):
|
||||
|
||||
- **Lit reference** at `C:\Users\andersonch\Code\openclaw\vendor\a2ui\renderers\lit\src\0.8`
|
||||
- **Native WinUI** in this repo at `src/OpenClaw.Tray.WinUI/A2UI/`
|
||||
|
||||
The Lit code looks like the canonical browser renderer the OpenClaw
|
||||
canvas host ships; the WinUI code is this repo's branch
|
||||
`feat/a2ui-native-winui`.
|
||||
|
||||
Citations use repo-local paths. Lit paths are anchored at the OpenClaw
|
||||
checkout: `openclaw\vendor\a2ui\renderers\lit\src\0.8\`. WinUI paths are
|
||||
anchored at `src/OpenClaw.Tray.WinUI/A2UI/`.
|
||||
|
||||
## Method
|
||||
|
||||
For each spec area, deductions land in two buckets:
|
||||
|
||||
- **Gap** — implementation is missing or wrong vs. spec. Letter grade penalty.
|
||||
- **Good deviation** — implementation does something the spec _doesn't say
|
||||
to do_, but it's the correct call. Listed but doesn't penalize.
|
||||
|
||||
Grades are A–F, separately for Lit and WinUI. There is no curving —
|
||||
"A" means it would pass a strict spec audit and a strict security
|
||||
audit; "B" means it works for normal traffic but fails under a hostile
|
||||
agent; etc.
|
||||
|
||||
---
|
||||
|
||||
## Scorecard
|
||||
|
||||
| Area | Lit | WinUI | Notes |
|
||||
| --- | --- | --- | --- |
|
||||
| Component coverage (catalog completeness) | A | A | both 18/18 |
|
||||
| Component property completeness | B | A− | Lit has 4 documented TODOs; WinUI has minor distribution mappings |
|
||||
| Streaming / JSONL parsing | B | A | Lit: lenient; WinUI: lenient + size caps |
|
||||
| Data binding / `A2UIValue` | B+ | A | Lit auto-parses JSON strings (surprising); WinUI strict RFC 6901 |
|
||||
| Action transport | B | A | Lit: DOM event passthrough; WinUI: debounced + single-flight + fallback queue + gateway tag protocol |
|
||||
| Action context security | D | A | Lit punts to host; WinUI scopes to declared `dataBinding` and redacts secrets |
|
||||
| Theming | A− | A− | Equivalent power; different idioms |
|
||||
| URL safety / SSRF | F | A− | Lit unrestricted; WinUI HTTPS+allowlist for `Image`/`Video`/`AudioPlayer`, plus DNS-rebinding pin on `Image` fetches only |
|
||||
| Modal lifecycle | A− | A | Both work; WinUI uses native `ContentDialog` |
|
||||
| List virtualization | C | A | Lit builds all items; WinUI uses `ItemsRepeater` w/ recycling |
|
||||
| Bi-directional binding (write-back) | A | A | Both implement; spec is silent (good deviation) |
|
||||
| Markdown in `Text` | B+ | n/a | Lit's enhancement is real but increases attack surface |
|
||||
| Test coverage | D | A− | Lit: 1 model test, no per-component; WinUI: render matrix + scale + integration |
|
||||
| Spec deviations called out (good ones) | B | A | Lit's improvements partially offset its gaps |
|
||||
| **Overall** | **B−** | **A−** | |
|
||||
|
||||
The two "A" grades have very different shapes:
|
||||
|
||||
- **Lit** is a smaller codebase that gets the happy path right, with two
|
||||
notable **good** deviations (Markdown rendering, bi-directional binding)
|
||||
but several papercut **gaps** and a **non-trivial security delta**
|
||||
inherited from a "the host will sanitize" posture.
|
||||
- **WinUI** is significantly more code, fills almost every gap, and adds
|
||||
defenses the spec doesn't ask for. Its remaining minus comes from the
|
||||
things it _doesn't_ do yet (List `template` mode, Row wrap, `MultipleChoice.variant`).
|
||||
|
||||
---
|
||||
|
||||
## Lit implementation — detailed deductions
|
||||
|
||||
### Documented `TODO` gaps
|
||||
|
||||
Verbatim TODOs in `vendor/a2ui/.../ui/root.ts` and component files:
|
||||
|
||||
| Property | File:Line | Status |
|
||||
| --- | --- | --- |
|
||||
| `Divider.thickness` / `axis` / `color` | `ui/root.ts:317` | type declared, value not applied to `<hr>` |
|
||||
| `MultipleChoice.maxAllowedSelections` | `ui/root.ts:334` | accepted but not enforced |
|
||||
| `TextField.validationRegexp` | `ui/root.ts:367` | not applied to `<input>` |
|
||||
| `DateTimeInput.outputFormat` | `ui/datetime-input.ts:159` | placeholder; always uses browser format |
|
||||
| `MultipleChoice.selections` resolution | `ui/multiple-choice.ts:87–103` | logic incomplete when `selections` is path-bound |
|
||||
| `AudioPlayer.description` | `ui/audio.ts` | spec'd property silently dropped |
|
||||
|
||||
Letter penalty: **−1 step on Component Property Completeness** (A → B).
|
||||
|
||||
### `A2UIValue.path` resolver auto-parses JSON-shaped strings
|
||||
|
||||
`data/model-processor.ts:198–225` detects `valueString` payloads that look
|
||||
like `{...}` or `[...]` and **silently parses them as JSON**. The intent is
|
||||
"developer convenience"; the consequence is that a string literal containing
|
||||
a `[` or `{` becomes a structured value. This is a **gap** because the spec
|
||||
distinguishes `valueString` from `valueArray`/`valueMap` precisely so the
|
||||
agent can be unambiguous. Letter penalty: **−1 step on data binding**.
|
||||
|
||||
### URLs are passed through to the DOM
|
||||
|
||||
`ui/image.ts:67–74` binds `<img src="${url}">` directly. There is no
|
||||
allowlist for `data:` / `javascript:` / `file:` / private-IP hosts, no
|
||||
SSRF protection, no DNS rebinding defense. The WinUI impl has all of
|
||||
these. The host **may** sanitize before forwarding URLs, but the
|
||||
renderer offers no defense in depth. Letter penalty: **−2 steps on URL
|
||||
safety** (this is the F).
|
||||
|
||||
### Component registry allows arbitrary custom elements
|
||||
|
||||
`vendor/a2ui/.../ui/root.ts:118–140, 441–471` lets the embedding app set
|
||||
`enableCustomElements = true` and then renders any `<component>` whose tag
|
||||
is registered in `componentRegistry`. This is **beyond spec** — useful for
|
||||
extensibility, dangerous for catalog-strict mode. **Not graded as a gap**
|
||||
since it's behind a flag, but it's worth flagging at the host level.
|
||||
|
||||
### One unit test covers everything
|
||||
|
||||
`vendor/a2ui/.../model.test.ts` exercises `A2uiMessageProcessor` for
|
||||
`beginRendering` and `surfaceUpdate`. There are **no per-component
|
||||
render tests, no event-dispatch tests, no markdown sanitizer tests, no
|
||||
data-binding edge-case tests**. Letter penalty: **−2 steps on test
|
||||
coverage** (D).
|
||||
|
||||
### Good deviations
|
||||
|
||||
- **Markdown rendering** in `Text` (`ui/text.ts`, `ui/directives/markdown.ts`).
|
||||
HTML blocks wrapped in `<iframe sandbox="">`; code blocks escaped via
|
||||
`sanitizer.escapeNodeText`. The spec says plain string. Whether this
|
||||
counts as good depends on the threat model — see
|
||||
[the Text/Markdown divergence](#text-markdown-divergence).
|
||||
- **Signal-driven re-render** via `@lit-labs/signals`. Cleaner reactivity
|
||||
than naive `requestUpdate()`.
|
||||
- **Bi-directional binding** in `CheckBox`, `TextField`, `Slider`,
|
||||
`DateTimeInput`. Spec is silent on write-back; both impls add it.
|
||||
|
||||
---
|
||||
|
||||
## WinUI implementation — detailed deductions
|
||||
|
||||
### Property-coverage misses
|
||||
|
||||
| Property | File:Line | Status |
|
||||
| --- | --- | --- |
|
||||
| `Row.distribution` `spaceBetween/Around/Evenly` | `Rendering/Renderers/ContainerRenderers.cs:10–32` | all three collapse to `HorizontalAlignment.Stretch` (WinUI `StackPanel` doesn't natively express justify-content) |
|
||||
| `Row.wrap` (multi-row) | n/a | not implemented; would need a custom `Panel` |
|
||||
| `List.template` mode | `Rendering/Renderers/ContainerRenderers.cs:57–159` | only `explicitList` supported |
|
||||
| `MultipleChoice.variant` (`chips`) | `Rendering/Renderers/InteractiveRenderers.cs:279–430` | always `ComboBox`/`ListView` |
|
||||
| `MultipleChoice.filterable` | same | not honored |
|
||||
| `TextField.validationRegexp` | `Rendering/Renderers/InteractiveRenderers.cs:98–199` | not enforced |
|
||||
| `Tabs` close / reorder | `Rendering/Renderers/ContainerRenderers.cs:187–235` | disabled |
|
||||
| Component `weight` | `Protocol/A2UIProtocol.cs:111–151` | parsed but not applied |
|
||||
|
||||
Letter penalty: **−1 step on Component Property Completeness**, but
|
||||
balanced by being the only impl that fills the corresponding Lit gaps
|
||||
(`maxAllowedSelections` is enforced; `Divider.axis` is honored).
|
||||
|
||||
### Action context scoping (the centerpiece win)
|
||||
|
||||
`Rendering/IComponentRenderer.cs:183–249` (`BuildActionContext`):
|
||||
|
||||
1. Collect `allowed` paths from the component's explicit `dataBinding`
|
||||
array, or — if absent — implicitly walk every `A2UIValue.path` referenced
|
||||
by the component's own properties.
|
||||
2. For each `action.context[]` entry, resolve only if `IsAllowedPath`
|
||||
matches (exact or ancestor with `/` boundary).
|
||||
3. Strip secret paths via `SecretRedactor` (`Rendering/SecretRedactor.cs`):
|
||||
- Registered paths (e.g., obscured `TextField` fields).
|
||||
- Substring denylist: `password`, `secret`, `token`.
|
||||
|
||||
This blocks the trivial "exfiltrate the whole tree" attack without
|
||||
requiring the host to know about A2UI internals. The Lit impl can't
|
||||
do this because it dispatches `action` straight through.
|
||||
|
||||
### URL safety — DNS rebinding defense (Image fetches)
|
||||
|
||||
`Rendering/MediaResolver.cs:57–95`:
|
||||
|
||||
```csharp
|
||||
new SocketsHttpHandler {
|
||||
ConnectCallback = async (ctx, ct) => {
|
||||
var addresses = await Dns.GetHostAddressesAsync(ctx.DnsEndPoint.Host, ct);
|
||||
foreach (var ip in addresses) {
|
||||
if (!IsPublicAddress(ip)) throw ...; // loopback, RFC1918, link-local, multicast
|
||||
}
|
||||
// connect to resolved IP, not hostname (no second DNS lookup)
|
||||
},
|
||||
PooledConnectionLifetime = TimeSpan.FromMinutes(2),
|
||||
};
|
||||
```
|
||||
|
||||
Plus an allowlist gate in `IsAllowed(url)`. Closes a TOCTOU window
|
||||
between an allowlist check and the actual TCP connect. The Lit impl
|
||||
does none of this.
|
||||
|
||||
**Limitation: this pin is image-only.** `Video`/`AudioPlayer` route through
|
||||
`MediaSource.CreateFromUri`, which performs its own DNS resolution at
|
||||
playback time outside the resolver. The HTTPS+allowlist gate still
|
||||
applies to those URLs, but the connect-time IP check does not — see
|
||||
`MediaResolver.TryResolveMediaUri`. A local-proxy approach was scoped
|
||||
out of the v0.8 native renderer; the allowlist is the load-bearing
|
||||
defense for media playback.
|
||||
|
||||
### Streaming hardening
|
||||
|
||||
`Protocol/A2UIProtocol.cs:176–367` and `Hosting/A2UIRouter.cs`:
|
||||
|
||||
| Cap | Value |
|
||||
| --- | --- |
|
||||
| Max line length | 1 MiB |
|
||||
| Max components per surface | 2000 |
|
||||
| Max entries per `dataModelUpdate` | 1024 |
|
||||
| Max key length | 256 |
|
||||
| Max string value | 64 KiB |
|
||||
| Max `valueMap` depth | 32 |
|
||||
| Max render depth | 64 |
|
||||
|
||||
All limits log + drop, never throw. Cycle detection in `_renderingIds`
|
||||
prevents id-loops in malformed surfaces.
|
||||
|
||||
### Component diff on `surfaceUpdate`
|
||||
|
||||
`Hosting/SurfaceHost.cs:ApplyComponents` compares incoming defs (name,
|
||||
weight, properties JSON-string) against the previous set and **skips
|
||||
rebuild if unchanged**. Effect: a re-emitted surface preserves
|
||||
`TextBox` caret position, scroll offset, and `Tabs` selection. The
|
||||
spec calls for "structural diffing"; this is a heuristic that catches
|
||||
the most common case (agent re-emits whole surface).
|
||||
|
||||
### Modal as native `ContentDialog`
|
||||
|
||||
`Rendering/Renderers/ContainerRenderers.cs:237–284` wires up a
|
||||
`ContentDialog` whose `Content` is the `contentChild` and whose trigger
|
||||
is the `entryPointChild` wrapped in a transparent `Button`. Spec leaves
|
||||
the modal _shape_ open; the WinUI impl gives it the full platform-modal
|
||||
treatment (focus trap, ESC dismiss, screen-reader announcement).
|
||||
|
||||
### List virtualization
|
||||
|
||||
`Rendering/Renderers/ContainerRenderers.cs:57–159` uses an
|
||||
`ItemsRepeater` with a `ChildIdTemplate` cache keyed by component id.
|
||||
Recycled elements are pulled from the cache so their data-binding
|
||||
subscriptions stay alive across scrolling. The Lit impl has no
|
||||
virtualization.
|
||||
|
||||
### Test surface
|
||||
|
||||
| Project | Focus |
|
||||
| --- | --- |
|
||||
| `OpenClaw.Shared.Tests/A2UICapabilitySecurityTests.cs` | protocol, secret redaction |
|
||||
| `OpenClaw.Tray.UITests/A2UIRenderingTests.cs` | per-component XAML rendering, data binding, live updates |
|
||||
| `OpenClaw.Tray.UITests/A2UIControlMatrixTests.cs` | property matrix coverage |
|
||||
| `OpenClaw.Tray.UITests/A2UIDashboardScaleTest.cs` | 1000+ component stress |
|
||||
| `OpenClaw.Tray.UITests/A2UIThemeTests.cs` | theme parsing |
|
||||
| `OpenClaw.Tray.UITests/A2UISvgTests.cs` | SVG decode + 8s timeout |
|
||||
| `OpenClaw.Tray.IntegrationTests/A2UICanvasIntegrationTests.cs` | end-to-end MCP smoke + PNG capture |
|
||||
|
||||
Coverage merged across all three suites via `dotnet-coverage` (per the
|
||||
auto-memory note). Letter grade A−; the missing step is that the
|
||||
gateway-action transport unit tests aren't fully isolated (depend on a
|
||||
fake `WindowsNodeClient`).
|
||||
|
||||
### Good deviations
|
||||
|
||||
| Deviation | File | Why it's good |
|
||||
| --- | --- | --- |
|
||||
| DNS rebinding defense (image fetches) | `Rendering/MediaResolver.cs:57–95` | spec doesn't ask but a hostile agent can otherwise pivot through the image fetch path to internal HTTP services. Does not extend to `Video`/`AudioPlayer` — see "URL safety" section. |
|
||||
| Action context allowlist | `Rendering/IComponentRenderer.cs:183–249` | minimum-information principle; spec leaves this open |
|
||||
| Secret denylist | `Rendering/SecretRedactor.cs` | catches `/auth/sessionToken` style names automatically |
|
||||
| `surfaceUpdate` diff | `Hosting/SurfaceHost.cs` | preserves caret/scroll/selection on re-emit |
|
||||
| Single-flight gate on action dispatch | `Actions/IActionSink.cs:27–142` | prevents fallback dequeue racing fresh send |
|
||||
| Per-surface theme scope | `Hosting/SurfaceHost.cs ApplyThemeToScope` | multi-surface tab views don't bleed themes |
|
||||
| `IA2UITelemetry` seam | `Telemetry/IA2UITelemetry.cs` | structured events instead of log scraping |
|
||||
| Single-handler `Func` events on `CanvasCapability` | reviewed in commit `5b9c468` | catches accidental multi-subscribe instead of silent `Delegate.Combine` |
|
||||
| MCP bearer token in Settings UI | `SettingsPage.xaml.cs` | quality-of-life for MCP setup, kept out of action payloads |
|
||||
|
||||
---
|
||||
|
||||
## Side-by-side: where they diverge meaningfully
|
||||
|
||||
### `Text` / Markdown divergence
|
||||
|
||||
The Lit impl renders Markdown; the WinUI impl renders plain text. This is
|
||||
the **biggest functional UX difference** between the two.
|
||||
|
||||
Lit's defense is `iframe sandbox=""` for HTML blocks plus
|
||||
`escapeNodeText` for code. That's a reasonable sandbox model in the
|
||||
browser — but every line still expands the renderer's attack surface
|
||||
beyond the spec's "plain string" promise.
|
||||
|
||||
For ms-windows-node, parity is **probably not worth chasing** unless
|
||||
the agent surfaces depend on it: WinUI doesn't have a built-in
|
||||
Markdown engine, and adding one means importing a dependency that has
|
||||
to be kept in lockstep with Lit's rendering choices to avoid surfaces
|
||||
that look right in the browser and broken on Windows. The defensible
|
||||
choice is to ask the agent to emit explicit `Text + usageHint`
|
||||
hierarchies instead of inline Markdown.
|
||||
|
||||
### List performance
|
||||
|
||||
If a surface includes a `List` of 200+ items, the Lit renderer will
|
||||
build all 200 children before paint. WinUI builds ~10 (whatever fits
|
||||
the viewport) and recycles as the user scrolls. For this repo's
|
||||
typical agent surfaces (dashboards, conversation panels) this is the
|
||||
single biggest performance delta.
|
||||
|
||||
### Action security model
|
||||
|
||||
The two impls have completely different threat models:
|
||||
|
||||
- **Lit + browser canvas host**: assume the embedding app is
|
||||
trustworthy and will sanitize. The renderer is a thin presenter.
|
||||
- **WinUI tray**: assume the renderer talks to a hostile agent over an
|
||||
arbitrary network. Apply policy in the renderer.
|
||||
|
||||
Neither is wrong, but a host that wants Lit-grade isolation has to
|
||||
build the same allowlist/denylist logic that WinUI bakes in. In
|
||||
practice that means anyone embedding the Lit renderer outside
|
||||
OpenClaw's canvas host needs to **wrap action handlers**, never just
|
||||
forward them.
|
||||
|
||||
---
|
||||
|
||||
## Known deviations by category
|
||||
|
||||
For PR reviewers — quick "is this OK?" reference.
|
||||
|
||||
| Deviation | Spec status | Lit | WinUI | Verdict |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| Bi-directional data-model write on user input | silent | ✓ | ✓ | Good — spec assumes it implicitly |
|
||||
| Markdown in `Text` | violation (plain string) | ✓ | ✗ | Lit: useful but expands attack surface; WinUI: stay plain |
|
||||
| Custom-element registry beyond catalog | violation (catalog-strict) | ✓ (flag) | ✗ | Risk; only enable in trusted hosts |
|
||||
| `valueString` auto-parsed as JSON | violation (type erasure) | ✓ | ✗ | Bug-shaped; rely on `valueMap`/`valueArray` |
|
||||
| Hard size caps on stream / model | silent | ✗ | ✓ | Good — DoS defense |
|
||||
| URL allowlist on media | silent | ✗ | ✓ | Good — SSRF defense |
|
||||
| DNS-rebinding defense (image fetches) | silent | ✗ | ✓ | Good — beyond allowlist. Image only; `Video`/`AudioPlayer` rely on the allowlist alone (OS media stack re-resolves at playback). |
|
||||
| Action context allowlist | silent | ✗ | ✓ | Good — minimum information |
|
||||
| Secret-path redaction | silent | ✗ | ✓ | Good — keeps tokens off the wire |
|
||||
| Component diff on `surfaceUpdate` | "structural diffing" (vague) | ✗ | ✓ | Good — preserves UI state |
|
||||
| `List` virtualization | "should virtualize" | ✗ | ✓ | Good — required for non-trivial surfaces |
|
||||
| `Modal` as native `ContentDialog` | shape open | `<dialog>` | `ContentDialog` | Both fine |
|
||||
| `MultipleChoice` single-mode writes scalar | spec implies array | array | scalar | WinUI's reads tolerate either; talk to your agent format |
|
||||
| `validationRegexp` (TextField) | spec property | ✗ TODO | ✗ | Both have a gap here |
|
||||
|
||||
---
|
||||
|
||||
## Recommended follow-ups (not part of grading)
|
||||
|
||||
These are the changes that would close the remaining minuses:
|
||||
|
||||
**WinUI (A− → A)**
|
||||
- Honor `MultipleChoice.variant` (`chips`) and `filterable`.
|
||||
- Apply `TextField.validationRegexp` (the catalog says it's a string;
|
||||
compile + on-change validate).
|
||||
- Consider `List.template` mode for surfaces that bind a list to a
|
||||
data-model array (also unblocks v0.9 readiness).
|
||||
- Add unit tests for `GatewayActionTransport` payload shape.
|
||||
|
||||
**Lit (B− → B+ or higher)**
|
||||
- Resolve the four documented `TODO`s (Divider, TextField,
|
||||
DateTimeInput, MultipleChoice).
|
||||
- Add per-component render tests and a markdown-sanitizer test suite.
|
||||
- Add at least an opt-in URL allowlist for media components.
|
||||
- Document the `enableCustomElements` flag's risk surface for
|
||||
embedding apps.
|
||||
@ -1,173 +0,0 @@
|
||||
# A2UI v0.8 — Protocol
|
||||
|
||||
This is a faithful summary of the v0.8 wire format, distilled from
|
||||
<https://a2ui.org/specification/v0.8-a2ui/> and
|
||||
<https://a2ui.org/specification/v0.8-a2a-extension/>.
|
||||
|
||||
## 1. Architecture
|
||||
|
||||
A2UI is a **streaming, declarative UI protocol** for LLM-generated
|
||||
interfaces:
|
||||
|
||||
- **Server → client**: a JSONL stream (typically over SSE, but the protocol
|
||||
is transport-agnostic) carrying UI updates.
|
||||
- **Client → server**: A2A messages reporting user events.
|
||||
- **Surfaces**: independently-controllable UI regions, addressed by
|
||||
`surfaceId`. A single agent stream can manage many surfaces in parallel.
|
||||
|
||||
The component model is an **adjacency list** — a flat dictionary of
|
||||
`id → component`, with parents referencing children by id. This is easier
|
||||
for an LLM to emit incrementally than nested trees and is the foundation of
|
||||
progressive rendering.
|
||||
|
||||
## 2. Server → client envelopes
|
||||
|
||||
Each JSONL line is a JSON object containing **exactly one** of these keys:
|
||||
|
||||
| Key | Purpose |
|
||||
| --- | --- |
|
||||
| `surfaceUpdate` | Add or replace components in a surface's adjacency list |
|
||||
| `dataModelUpdate` | Mutate the surface's data model |
|
||||
| `beginRendering` | Signal "ready to render"; specify `root` and chosen catalog |
|
||||
| `deleteSurface` | Tear down a surface |
|
||||
|
||||
### 2.1 `surfaceUpdate`
|
||||
|
||||
```json
|
||||
{ "surfaceUpdate": {
|
||||
"surfaceId": "main",
|
||||
"components": [
|
||||
{ "id": "btn-1",
|
||||
"component": { "Button": { "child": "lbl-1", "action": { ... } } } }
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
Each entry has `id`, exactly one `component.{TypeName}` object, and an
|
||||
optional `weight` (used when the parent applies weighted distribution; not
|
||||
all parents honor it). The component definition is **catalog-validated**:
|
||||
unknown types fall back to a placeholder (clients MUST NOT crash on unknown
|
||||
types).
|
||||
|
||||
### 2.2 `dataModelUpdate`
|
||||
|
||||
```json
|
||||
{ "dataModelUpdate": {
|
||||
"surfaceId": "main",
|
||||
"path": "/optional/base",
|
||||
"contents": [
|
||||
{ "key": "name", "valueString": "Ada" },
|
||||
{ "key": "age", "valueNumber": 36 },
|
||||
{ "key": "active", "valueBoolean": true },
|
||||
{ "key": "address","valueMap": [ { "key": "city", "valueString": "London" } ] }
|
||||
]
|
||||
}}
|
||||
```
|
||||
|
||||
The `contents` array is a **typed key-value list** — `valueString`,
|
||||
`valueNumber`, `valueBoolean`, `valueMap`, `valueArray`. Updates are merged
|
||||
into the surface's data model rooted at `path` (default `/`). The spec
|
||||
leaves "merge vs replace" semantics underspecified; in practice both
|
||||
reference clients overwrite leaves and recurse into maps.
|
||||
|
||||
A special idiom — `path: "/x", contents: [{ "key": ".", "valueString": "v" }]`
|
||||
— is used to set a primitive at a non-root path.
|
||||
|
||||
### 2.3 `beginRendering`
|
||||
|
||||
```json
|
||||
{ "beginRendering": {
|
||||
"surfaceId": "main",
|
||||
"catalogId": "https://a2ui.org/specification/v0_8/standard_catalog_definition.json",
|
||||
"root": "card-1"
|
||||
}}
|
||||
```
|
||||
|
||||
Acts as a **synchronization gate**: until the client sees this, it should
|
||||
buffer components/data without rendering. `catalogId` is optional —
|
||||
default is the v0.8 standard catalog. `styles` may also appear here for
|
||||
per-surface theme tokens.
|
||||
|
||||
### 2.4 `deleteSurface`
|
||||
|
||||
```json
|
||||
{ "deleteSurface": { "surfaceId": "main" } }
|
||||
```
|
||||
|
||||
Disposes the surface, its data model, and any subscriptions.
|
||||
|
||||
## 3. Client → server events
|
||||
|
||||
### 3.1 `userAction`
|
||||
|
||||
```json
|
||||
{ "userAction": {
|
||||
"name": "submit",
|
||||
"surfaceId": "main",
|
||||
"sourceComponentId": "btn-1",
|
||||
"timestamp": "2026-04-27T17:05:00Z",
|
||||
"context": { "email": "ada@example.com" }
|
||||
}}
|
||||
```
|
||||
|
||||
`context` is the **resolved** snapshot of the action's `context[]`
|
||||
(BoundValues evaluated against the data model at click time — see
|
||||
[`data-and-actions.md`](./data-and-actions.md)).
|
||||
|
||||
### 3.2 `error`
|
||||
|
||||
A client-side error reporting envelope. The spec leaves the body shape
|
||||
underspecified.
|
||||
|
||||
## 4. A2A extension (v0.8)
|
||||
|
||||
A2UI rides on **A2A** as a typed extension:
|
||||
|
||||
- Extension URI: `https://a2ui.org/a2a-extension/a2ui/v0.8`
|
||||
- Messages are A2A `DataPart` objects with `mimeType: "application/json+a2ui"`.
|
||||
- Capability negotiation:
|
||||
- **Agent advertises** in `AgentCapabilities.extensions`:
|
||||
- `supportedCatalogIds: string[]`
|
||||
- `acceptsInlineCatalogs: bool`
|
||||
- **Client declares** support via transport-specific signaling
|
||||
(`X-A2A-Extensions` HTTP header, gRPC metadata, JSON-RPC mechanism).
|
||||
- Client may include in A2A message metadata:
|
||||
```json
|
||||
{ "metadata": { "a2uiClientCapabilities": {
|
||||
"supportedCatalogIds": [ "https://a2ui.org/.../standard_catalog_definition.json" ],
|
||||
"inlineCatalogs": [ { "catalogId": "...", "components": {...}, "styles": {...} } ]
|
||||
}}}
|
||||
```
|
||||
- Server picks one in the next `beginRendering`.
|
||||
|
||||
The available spec text is partial — push/pull operations, retry,
|
||||
backpressure, and authentication details are **delegated to the A2A layer**
|
||||
or to implementations.
|
||||
|
||||
## 5. Lifecycle
|
||||
|
||||
1. Client opens an A2A session and announces capabilities.
|
||||
2. Server starts a JSONL stream:
|
||||
1. Emits `surfaceUpdate` and `dataModelUpdate` lines (any order).
|
||||
2. Emits `beginRendering` once the surface is render-ready.
|
||||
3. Client renders the tree rooted at `root`.
|
||||
4. User interacts → client emits `userAction` (A2A message, **not** on the
|
||||
JSONL stream).
|
||||
5. Server responds with more JSONL.
|
||||
6. Server emits `deleteSurface` when done, or session ends.
|
||||
|
||||
## 6. Implementation notes (deltas from raw spec)
|
||||
|
||||
These behaviors are spec-silent or under-specified; both reference
|
||||
implementations and this repo make pragmatic choices:
|
||||
|
||||
- **Line-delimited JSON parsing** must tolerate malformed lines gracefully —
|
||||
a single bad line MUST NOT abort the stream. Both impls log + skip.
|
||||
- **Size caps** on lines, components per surface, data-model entries.
|
||||
WinUI applies hard caps (1 MiB / 2000 / 1024); Lit does not.
|
||||
- **Modal lifecycle**: spec defines `entryPointChild` + `contentChild` but
|
||||
not _when_ the modal is open. Lit uses `<dialog>.showModal()` driven by
|
||||
internal state; WinUI uses a `ContentDialog` triggered by entry click.
|
||||
- **Streaming partial components**: a `surfaceUpdate` may reference an
|
||||
`id` whose contents arrive on a later line. Clients MUST defer rendering
|
||||
of undefined refs, not throw.
|
||||
@ -79,8 +79,6 @@ Add ALL needed commands to `gateway.nodes.allowCommands` in `~/.openclaw/opencla
|
||||
// Device metadata/status
|
||||
"device.info",
|
||||
"device.status",
|
||||
// Text-to-speech playback (enable only when agent-driven audio is desired)
|
||||
"tts.speak",
|
||||
// System (already in Windows defaults, but listed for completeness)
|
||||
// "system.run",
|
||||
// "system.run.prepare",
|
||||
@ -179,7 +177,7 @@ PR #159 originally explored session-based start/stop recording commands, but the
|
||||
|
||||
| Command | macOS | Windows | Notes |
|
||||
|---------|-------|---------|-------|
|
||||
| `browser.proxy` | ✅ | ✅ | Local browser-control bridge; requires browser control host on gateway port + 2, retries with password/basic auth if bearer auth is rejected, and managed SSH tunnel mode forwards local+2 to remote+2 when enabled |
|
||||
| `browser.proxy` | ✅ | ❌ | Chrome DevTools proxy |
|
||||
|
||||
### 2.6 Safe Gateway-Policy Gaps to Consider
|
||||
|
||||
@ -336,7 +334,7 @@ Recommended gateway defaults:
|
||||
| Command bucket | Windows default? | Reason |
|
||||
|----------------|------------------|--------|
|
||||
| Safe declared companion commands: `canvas.*`, `camera.list`, `location.get`, `screen.snapshot`, `device.info`, `device.status` | Yes | Matches macOS parity and only applies when declared by the node |
|
||||
| Dangerous/privacy-heavy commands: `camera.snap`, `camera.clip`, `screen.record`, `stt.transcribe`, write commands like `contacts.add` | No | Existing gateway model already requires explicit `gateway.nodes.allowCommands` |
|
||||
| Dangerous/privacy-heavy commands: `camera.snap`, `camera.clip`, `screen.record`, write commands like `contacts.add` | No | Existing gateway model already requires explicit `gateway.nodes.allowCommands` |
|
||||
| Exec commands: `system.run`, `system.run.prepare`, `system.which`, `system.notify`, `browser.proxy` | Yes | Existing Windows headless-host behavior |
|
||||
|
||||
Until the gateway expands Windows safe defaults, the practical local solution is:
|
||||
@ -345,50 +343,25 @@ Until the gateway expands Windows safe defaults, the practical local solution is
|
||||
2. Configure `gateway.nodes.allowCommands` for the Windows companion features.
|
||||
3. Re-pair after command-list changes because the gateway snapshots commands at approval time.
|
||||
|
||||
### 5.1 Gateway Node Allowlist Configuration
|
||||
|
||||
`gateway.nodes.allowCommands` is the explicit opt-in list the gateway uses after platform defaults. It should contain exact command names, not broad wildcard grants, for commands that are safe but not yet in the Windows default policy.
|
||||
|
||||
Recommended safe Windows companion allowlist:
|
||||
|
||||
```bash
|
||||
openclaw config set gateway.nodes.allowCommands '["canvas.present","canvas.hide","canvas.navigate","canvas.eval","canvas.snapshot","canvas.a2ui.push","canvas.a2ui.pushJSONL","canvas.a2ui.reset","camera.list","location.get","screen.snapshot","device.info","device.status","system.execApprovals.get","system.execApprovals.set"]'
|
||||
openclaw gateway restart
|
||||
```
|
||||
|
||||
`gateway.nodes.denyCommands` can be used as a final explicit blocklist when you want to suppress a command even if a platform default or allowlist entry would otherwise allow it.
|
||||
|
||||
Privacy-sensitive commands should stay out of the default safe list and should only be added deliberately:
|
||||
|
||||
```text
|
||||
camera.snap
|
||||
camera.clip
|
||||
screen.record
|
||||
stt.transcribe
|
||||
```
|
||||
|
||||
After changing either `gateway.nodes.allowCommands` or `gateway.nodes.denyCommands`, re-approve or re-pair the Windows node. Approved device records may keep a snapshot of the commands that were visible at approval time, so a gateway restart alone may not refresh existing approvals.
|
||||
|
||||
### 5.2 Immediate Code Fixes (This Branch)
|
||||
### 5.1 Immediate Code Fixes (This Branch)
|
||||
|
||||
- [x] Rename `screen.capture` → `screen.snapshot` in `ScreenCapability.cs`
|
||||
- [x] Remove `screen.list` from declared commands
|
||||
- [x] Remove debug logging from `WindowsNodeClient.cs`
|
||||
- [x] Add Mac-compatible fixed-duration `screen.record`; do not add `screen.list` or record start/stop commands
|
||||
|
||||
### 5.3 Setup Wizard Improvements
|
||||
### 5.2 Setup Wizard Improvements
|
||||
|
||||
- [x] Send `bootstrapToken` in correct field: `auth.bootstrapToken` not `auth.token`
|
||||
- [x] Handle `hello-ok.auth.deviceToken` — save it for future connections
|
||||
- [x] Accept QR images and clipboard setup content as alternate ways to enter the same bootstrap payload
|
||||
- [x] Show "auto-paired!" vs "waiting for approval" based on auth method
|
||||
- [x] Handle bootstrap token expiry gracefully when setup code payloads include expiry metadata (`expiresAt`, `expires_at`, `expires`, `expiry`, or `exp`)
|
||||
- [x] Add Settings toggles for optional Windows node capability groups (`canvas`, `screen`, `camera`, `location`, `browser.proxy`)
|
||||
- [ ] Show "auto-paired!" vs "waiting for approval" based on auth method
|
||||
- [ ] Handle bootstrap token expiry gracefully (re-generate if expired)
|
||||
|
||||
### 5.4 Upstream Contributions / Issues to File
|
||||
### 5.3 Upstream Contributions / Issues to File
|
||||
|
||||
- [x] **Request Windows/macOS parity for safe declared commands** — Windows should allow the same safe companion commands macOS does, while dangerous commands stay explicit opt-in. Draft included below.
|
||||
- [x] **Document `gateway.nodes.allowCommands`** — local Windows integration docs now describe allowCommands, denyCommands, safe parity commands, privacy-sensitive opt-ins, and re-pair requirements.
|
||||
- [ ] **Request Windows/macOS parity for safe declared commands** — Windows should allow the same safe companion commands macOS does, while dangerous commands stay explicit opt-in.
|
||||
- [ ] **Document `gateway.nodes.allowCommands`** — it's not in the config reference page
|
||||
- [x] **Add `canvas.a2ui.pushJSONL`** — current Mac supports it as a legacy JSONL alias; Windows routes it through the same A2UI push handler
|
||||
|
||||
#### Upstream issue draft
|
||||
@ -425,14 +398,13 @@ Proposal:
|
||||
- `camera.snap`
|
||||
- `camera.clip`
|
||||
- `screen.record`
|
||||
- `stt.transcribe`
|
||||
- write commands such as `contacts.add`, `calendar.add`, etc.
|
||||
|
||||
This does not grant capabilities to headless Windows hosts by itself. A command still has to pass both gates: the node must declare it in `commands`, and the gateway policy must allow it. Headless Windows node hosts that only declare `system.run` / `system.which` remain exec-only.
|
||||
|
||||
Related documentation gap: `gateway.nodes.allowCommands` and `gateway.nodes.denyCommands` should be documented in the gateway configuration reference, including the requirement to re-pair after command-list changes because approved pairing records snapshot declared commands.
|
||||
|
||||
### 5.5 User-Facing Documentation
|
||||
### 5.4 User-Facing Documentation
|
||||
|
||||
When shipping the Windows node, README/wiki should tell users:
|
||||
|
||||
@ -443,7 +415,7 @@ When shipping the Windows node, README/wiki should tell users:
|
||||
> ```
|
||||
> Then re-pair the node (`openclaw devices reject <old-id>` + re-approve).
|
||||
>
|
||||
> Add `camera.snap`, `camera.clip`, `screen.record`, and `stt.transcribe` only when you explicitly want to allow privacy-sensitive camera, screen, or microphone capture.
|
||||
> Add `camera.snap`, `camera.clip`, and `screen.record` only when you explicitly want to allow privacy-sensitive camera or screen capture.
|
||||
>
|
||||
> The Windows tray Command Center (`openclaw://commandcenter`) surfaces these policy problems directly: it separates safe companion allowlist fixes from privacy-sensitive opt-ins and provides copyable repair text for safe fixes or pending pairing approval.
|
||||
|
||||
|
||||
@ -1,369 +0,0 @@
|
||||
# OpenClaw Windows local gateway: WSL-owner Q&A
|
||||
|
||||
This document is the structured record of the questions we asked Craig Loewen
|
||||
(WSL) about the Windows OpenClaw local-gateway design, and Craig's answers.
|
||||
It is the canonical "why does the architecture look like this?" reference
|
||||
for the Windows local-gateway PR.
|
||||
|
||||
Companion: [`docs/wsl-owner-validation.md`](wsl-owner-validation.md)
|
||||
describes the resulting design as it ships.
|
||||
|
||||
**Status legend:** ✅ Answered (verbatim or paraphrased Craig answer
|
||||
recorded). 🟡 Open.
|
||||
|
||||
**Source:** Craig Loewen's review of the prototype `wsl-owner-open-issues.md`
|
||||
(2026-05-04). His answers are summarized authoritatively in
|
||||
`.squad/decisions.md` under "Decision: Craig Loewen's WSL Answers
|
||||
(Authoritative)" and underpinned the Phase 3 plan revision in
|
||||
`.squad/decisions-archive.md`. The architecture statements below are
|
||||
paraphrased; Mike's relayed verbatim Q&A lives in the squad decisions thread,
|
||||
not in the public PR.
|
||||
|
||||
The design is built on three coupled choices:
|
||||
|
||||
1. **Distribution model:** create a dedicated `OpenClawGateway` instance from
|
||||
the Store Ubuntu-24.04 package and configure it post-install — no custom
|
||||
OpenClaw rootfs.
|
||||
2. **Networking model:** loopback only between the Windows tray and the
|
||||
gateway in WSL — no WSL-IP fallback, no `lan`/`auto` bind.
|
||||
3. **Lifecycle model:** instance-scoped `wsl --terminate OpenClawGateway` for
|
||||
repair; user-systemd plus a tray-owned keepalive for liveness; no global
|
||||
`wsl --shutdown` and no global `.wslconfig` mutation.
|
||||
|
||||
The goal remains a low-maintenance implementation that uses the public
|
||||
OpenClaw Linux installer unchanged and does not maintain a custom OpenClaw
|
||||
Linux distribution.
|
||||
|
||||
## Final shape
|
||||
|
||||
1. The Windows tray verifies WSL/WSL2 availability.
|
||||
2. The tray creates a dedicated WSL2 instance named `OpenClawGateway` from
|
||||
the Store Ubuntu-24.04 package:
|
||||
```powershell
|
||||
wsl.exe --install Ubuntu-24.04 `
|
||||
--name OpenClawGateway `
|
||||
--location "$env:LOCALAPPDATA\OpenClawTray\wsl" `
|
||||
--no-launch `
|
||||
--version 2
|
||||
```
|
||||
3. The tray launches the instance as root and applies OpenClaw-owned
|
||||
configuration:
|
||||
- create the `openclaw` user;
|
||||
- create `/home/openclaw/.openclaw`, `/opt/openclaw`,
|
||||
`/var/lib/openclaw`, and `/var/log/openclaw`;
|
||||
- write `/etc/wsl.conf` and `/etc/wsl-distribution.conf`;
|
||||
- set the default user to `openclaw` via
|
||||
`wsl --manage OpenClawGateway --set-default-user openclaw`;
|
||||
- terminate only `OpenClawGateway` so WSL config takes effect.
|
||||
4. The tray runs the public OpenClaw Linux installer inside the instance:
|
||||
`https://openclaw.ai/install-cli.sh` with prefix `/opt/openclaw`. No
|
||||
forked or patched gateway installer.
|
||||
5. The tray uses upstream OpenClaw CLI/service commands to configure and
|
||||
start the gateway.
|
||||
6. The tray calls upstream `openclaw qr --json`, consumes the upstream
|
||||
setup-code/bootstrap-token handoff, and pairs Windows tray operator and
|
||||
Windows tray node sessions; both device tokens land in
|
||||
`%APPDATA%\OpenClawTray\device-key-ed25519.json`.
|
||||
|
||||
## Issue 1: Ubuntu Store package + post-install configuration
|
||||
|
||||
### Q1.1 — Is `wsl --install Ubuntu-24.04 --name OpenClawGateway --location ... --no-launch --version 2` a supported primitive for a Windows app creating a dedicated app-owned WSL instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Yes — supportable. This is the canonical primitive for an
|
||||
app-owned WSL instance.
|
||||
|
||||
**Implication:** `LocalGatewaySetup.cs` issues exactly this command. The
|
||||
clean port removed `--web-download`, `--from-file`, and any rootfs-import
|
||||
fallback.
|
||||
|
||||
### Q1.2 — Is it acceptable to treat the install as successful when post-conditions pass, even if the `wsl --install` process itself hangs or exits unclearly?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **Trust the exit code.** The hang-fallback pattern from the
|
||||
prototype is not needed.
|
||||
|
||||
**Implication:** The clean engine treats `wsl --install` exit 0 as the
|
||||
success signal, and additionally confirms `OpenClawGateway` appears in
|
||||
`wsl --list --quiet` to defend against the "winget-style" failure mode where
|
||||
exit 0 reports success without registering a distro (see Q1.3). Non-zero
|
||||
exit ⇒ install failure; no postcondition-on-hang path.
|
||||
|
||||
### Q1.3 — Should we prefer generic `Ubuntu`, explicit `Ubuntu-24.04`, `--web-download`, `--from-file`, or another source for the default path?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Use **explicit `Ubuntu-24.04`**, not generic `Ubuntu`. No
|
||||
`--web-download` and no `--from-file` are needed.
|
||||
|
||||
**Implication:** The clean install command is pinned to `Ubuntu-24.04`. The
|
||||
prototype's "generic `Ubuntu` channel was more reliable on this dev machine"
|
||||
observation is not a basis for a final product default.
|
||||
|
||||
Empirical confirmation (2026-05-04, 20-iter harness on Windows 10.0.26200,
|
||||
WSL 2.6.3.0): `wsl --install Ubuntu-24.04 --name <gen> --location <path>
|
||||
--no-launch --version 2` succeeded **10/10**; `winget install --id
|
||||
Canonical.Ubuntu.2404 -e --silent --accept-source-agreements
|
||||
--accept-package-agreements --disable-interactivity` succeeded **0/10**
|
||||
(stages the launcher APPX but never registers a WSL distro under
|
||||
`--silent --disable-interactivity`). Raw artifacts:
|
||||
`artifacts/wsl-install-vs-winget/run-20260504-131837/summary.json`.
|
||||
|
||||
### Q1.4 — What is the recommended enterprise/offline fallback when Store access is blocked?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Modern WSL distributions are no longer Store-gated; an offline
|
||||
fallback is **not needed** for this PR.
|
||||
|
||||
**Implication:** No offline fallback path ships in this PR. If a future
|
||||
enterprise scenario surfaces a real blocker, that decision can be revisited
|
||||
separately.
|
||||
|
||||
### Q1.5 — Are `automount=false`, `interop=false`, and `appendWindowsPath=false` appropriate for this managed instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Yes — all three settings are appropriate for an app-owned
|
||||
appliance.
|
||||
|
||||
**Implication:** `/etc/wsl.conf` ships with all three disabled (see
|
||||
`docs/wsl-owner-validation.md`).
|
||||
|
||||
### Q1.6 — Are there WSL/systemd/machine-id/DNS/timezone details we should explicitly repair or validate after cloning/configuring an Ubuntu instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No post-clone repairs needed** — machine-id / DNS / timezone
|
||||
work as delivered.
|
||||
|
||||
**Implication:** The setup engine does not regenerate `/etc/machine-id`,
|
||||
does not rewrite `/etc/resolv.conf`, and does not touch timezone state. It
|
||||
relies on `useWindowsTimezone=true` in `/etc/wsl.conf` for clock alignment.
|
||||
|
||||
### Q1.7 — Should OpenClaw avoid writing `/etc/wsl-distribution.conf`, or is it appropriate to suppress shortcuts/terminal profile for the dedicated instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Use both `wsl.conf` and `wsl-distribution.conf`. Suppressing
|
||||
shortcut/terminal entries is the correct application of
|
||||
`wsl-distribution.conf` for a privately managed instance.
|
||||
|
||||
**Implication:** The setup engine writes `/etc/wsl-distribution.conf` with
|
||||
`shortcut.enabled=false` and `terminal.enabled=false`.
|
||||
|
||||
## Issue 2: Local networking between Windows and the WSL gateway
|
||||
|
||||
### Q2.1 — Is Windows localhost forwarding to a WSL2 service reliable enough to make `loopback` the final default?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **Yes — loopback only.** Windows localhost forwarding to a WSL2
|
||||
service is a reliable core WSL promise.
|
||||
|
||||
**Implication:** Gateway binds to loopback inside WSL on `:18789`. Windows
|
||||
tray connects via `http://localhost:18789` / `ws://localhost:18789`. The
|
||||
prototype's earlier observations of localhost-forwarding flakiness were
|
||||
attributed to other lifecycle issues (see Issue 3) and not to the forwarding
|
||||
contract itself.
|
||||
|
||||
### Q2.2 — If localhost forwarding fails, is WSL-IP fallback a supported/recommended pattern for a Windows app-owned WSL instance?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No.** WSL-IP fallback is not the recommended pattern.
|
||||
|
||||
**Implication:** The clean port has **no** WSL-IP fallback. The endpoint
|
||||
resolver does not enumerate WSL interface addresses, does not run
|
||||
`hostname -I` / `ip -4 addr` / `ip route` / `ss -ltnp` inside WSL, and
|
||||
returns exactly one candidate: `http://localhost:18789`.
|
||||
|
||||
### Q2.3 — Is `gateway.bind=lan` inside the WSL instance acceptable for the fallback path, assuming the Windows tray still only advertises/selects local endpoints by default?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No** — loopback only.
|
||||
|
||||
**Implication:** The setup engine never writes `gateway.bind=lan`. The
|
||||
runtime configuration surface for `gateway.bind` was removed.
|
||||
|
||||
### Q2.4 — Should we implement `auto` bind promotion instead of defaulting to `lan`?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No.** Loopback only; no `auto` promotion.
|
||||
|
||||
**Implication:** No promotion logic exists in the clean port. There is one
|
||||
bind mode, and it is loopback.
|
||||
|
||||
### Q2.5 — Are there WSL NAT, mirrored networking, firewall, or portproxy recommendations we should follow while still avoiding global `.wslconfig` changes?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** No — loopback forwarding works without any of those
|
||||
modifications.
|
||||
|
||||
**Implication:** The tray does not write to `.wslconfig`, does not configure
|
||||
mirrored networking, does not add Windows firewall rules, and does not run
|
||||
`netsh interface portproxy` for normal local-gateway operation.
|
||||
|
||||
### Q2.6 — What diagnostics should we capture before asking users/maintainers to file WSL networking bugs?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Point at **<https://aka.ms/wsllogs>**. Do not scrape WSL internal
|
||||
log files from the product.
|
||||
|
||||
**Implication:** On any setup or networking failure, the
|
||||
`LocalSetupProgressPage` shows an aka.ms/wsllogs hint, the validation
|
||||
script's `Save-DiagnosticsSnapshot` records `wslLogsHelp =
|
||||
https://aka.ms/wsllogs`, and the run summary appends a "Diagnostics: see
|
||||
https://aka.ms/wsllogs..." note. The product captures only its own state
|
||||
(Windows-side `:18789` listener snapshot, loopback `/health` probe,
|
||||
redacted setup-state.json) and a generated repro guide.
|
||||
|
||||
## Issue 3: WSL gateway lifecycle and service ownership
|
||||
|
||||
### Q3.1 — For an app-owned WSL appliance, should the gateway be a user-systemd service, a root/system service wrapper, or something else?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Both **user-systemd** and a **tray-owned keepalive** are
|
||||
acceptable for this shape.
|
||||
|
||||
**Implication:** The clean port uses upstream OpenClaw service primitives
|
||||
under the `openclaw` user, plus a tray-owned WSL keepalive
|
||||
(`wsl.exe -d OpenClawGateway -u openclaw -- sleep 2147483647`) while
|
||||
local-gateway mode is active. Readiness still requires Windows-side
|
||||
`/health` to succeed — `systemctl active` alone does not imply Windows
|
||||
reachability.
|
||||
|
||||
### Q3.2 — Is `loginctl enable-linger openclaw` expected to be reliable in this WSL shape, or should we avoid depending on it?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Linger is acceptable for this shape (alongside the tray
|
||||
keepalive).
|
||||
|
||||
**Implication:** Setup runs `loginctl enable-linger openclaw`. The tray
|
||||
keepalive remains as belt-and-suspenders for the active local-gateway
|
||||
window.
|
||||
|
||||
### Q3.3 — Is a tray-owned keepalive process acceptable, or should it be treated as validation-only?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Acceptable as a product primitive (see Q3.1). It is not
|
||||
validation-only.
|
||||
|
||||
**Implication:** The keepalive ships as part of the runtime, not just as a
|
||||
test scaffold.
|
||||
|
||||
### Q3.4 — Is instance-scoped `wsl --terminate OpenClawGateway` the right repair/restart primitive?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **Yes.** Use `wsl --terminate OpenClawGateway` only. **Never**
|
||||
global `wsl --shutdown`.
|
||||
|
||||
**Implication:** Setup, repair, validation, and removal paths all use
|
||||
`wsl --terminate OpenClawGateway`. `git grep 'wsl --shutdown'` over the
|
||||
clean worktree returns no product or validation hits.
|
||||
|
||||
### Q3.5 — Are there cases where global `wsl --shutdown` is recommended or unavoidable, despite our desire to avoid it?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** **No.** Do not issue `wsl --shutdown` from this product.
|
||||
|
||||
**Implication:** Recreate / FreshMachine validation scenarios use
|
||||
`wsl --unregister OpenClawGateway` for destructive cleanup. They never
|
||||
issue a global shutdown.
|
||||
|
||||
### Q3.6 — What lifecycle diagnostics should the tray collect when WSL reports the service active but Windows cannot connect?
|
||||
|
||||
**Status:** ✅ Answered.
|
||||
|
||||
**Craig:** Same answer as Q2.6 — point at <https://aka.ms/wsllogs>; the
|
||||
product should not scrape WSL logs.
|
||||
|
||||
**Implication:** The product collects only its own state and points at the
|
||||
WSL-team-owned diagnostics page. See Q2.6.
|
||||
|
||||
## Mac app comparison: operator vs node
|
||||
|
||||
The macOS app runs operator/UI and a local Mac node from the same app
|
||||
binary/process via separate gateway sessions:
|
||||
|
||||
- `GatewayConnection.shared` owns one `GatewayChannelActor` for
|
||||
operator/UI scopes (`role: "operator"`, `clientMode: "ui"`).
|
||||
- `MacNodeModeCoordinator.shared.start()` owns a separate
|
||||
`GatewayNodeSession` and `MacNodeRuntime` (`role: "node"`,
|
||||
`clientId: "openclaw-macos"`, capabilities for canvas / screen / browser
|
||||
/ etc.), connecting to the same gateway URL over a distinct WebSocket.
|
||||
- In local mode, `GatewayProcessManager` manages the local gateway via
|
||||
launchd / OpenClaw CLI behavior; in remote mode,
|
||||
`ConnectionModeCoordinator` stops the local gateway and uses
|
||||
`NodeServiceManager.start()` against the remote gateway.
|
||||
|
||||
**Implication for Windows (decided by Mike):** The Windows tray pairs as
|
||||
**both operator and node** against the local gateway, mirroring the macOS
|
||||
in-app node model. There is **no separate WSL-internal worker** in this
|
||||
PR. `StartWorker` / `PairWorker` phases were dropped; the
|
||||
`PreserveWorkerData` parameter and `worker_data_preserved` lifecycle step
|
||||
were removed in Phase 3 cleanup.
|
||||
|
||||
If a future scope adds a Linux worker inside the WSL gateway instance, it
|
||||
will require a separate upstream-supported install/start/list proof and a
|
||||
new owner decision — not a re-litigation of the current PR.
|
||||
|
||||
## Architectural decisions captured
|
||||
|
||||
For traceability, the high-order decisions implied by Craig's answers are:
|
||||
|
||||
1. **Distribution model** — Store Ubuntu-24.04 + post-install configuration;
|
||||
no custom rootfs; no offline fallback. (Q1.1, Q1.3, Q1.4)
|
||||
2. **Configuration** — `wsl.conf` (systemd, automount/interop/appendPath
|
||||
off, default user `openclaw`, `useWindowsTimezone=true`) +
|
||||
`wsl-distribution.conf` (no shortcut, no terminal). No post-clone
|
||||
repairs. (Q1.5, Q1.6, Q1.7)
|
||||
3. **Networking** — Loopback only, port 18789. No WSL-IP fallback. No
|
||||
`lan`/`auto` bind. No `.wslconfig` / portproxy / firewall mutation.
|
||||
(Q2.1–Q2.5)
|
||||
4. **Lifecycle** — User-systemd + tray keepalive. Linger acceptable.
|
||||
`wsl --terminate OpenClawGateway` for repair. **Never** global
|
||||
`wsl --shutdown`. (Q3.1–Q3.5)
|
||||
5. **Diagnostics** — `https://aka.ms/wsllogs`. No internal log scraping.
|
||||
(Q2.6, Q3.6)
|
||||
6. **Roles in scope** — Windows tray operator + Windows tray node.
|
||||
Worker-in-WSL out of scope. (Mac app comparison + Mike's Phase-0
|
||||
decision.)
|
||||
|
||||
These decisions are reflected one-for-one in:
|
||||
|
||||
- `src/OpenClaw.Tray.WinUI/Services/LocalGatewaySetup/LocalGatewaySetup.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/App.xaml.cs` (factory + identity-path wiring)
|
||||
- `src/OpenClaw.Tray.WinUI/Services/NodeService.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/Pages/SetupWarningPage.cs`
|
||||
- `src/OpenClaw.Tray.WinUI/Onboarding/Pages/LocalSetupProgressPage.cs`
|
||||
- `scripts/validate-wsl-gateway.ps1` (4 scenarios)
|
||||
- `scripts/reset-openclaw-wsl-validation-state.ps1` (exact-target gated
|
||||
cleanup)
|
||||
|
||||
## Open follow-ups
|
||||
|
||||
These are not open architecture questions for Craig — they are tracked
|
||||
work items that intentionally fall outside this PR:
|
||||
|
||||
- **Off-box / LAN / phone reachability via OpenClaw relay.** Blocked on
|
||||
relay ownership / protocol clarity. Not addressed in this PR.
|
||||
- **`winget install Microsoft.WSL` as a platform repair fallback.** Deeper
|
||||
research in flight; does not change the Phase 3 decision to use
|
||||
`wsl --install` for distro creation in this PR.
|
||||
- **Onboarding copy localization.** `Onboarding_SetupWarning_*` /
|
||||
`Onboarding_LocalSetupProgress_*` resw entries to be added across
|
||||
supported locales after Mike signs off final copy.
|
||||
|
||||
No open questions for Craig remain that block this PR.
|
||||
@ -1,384 +0,0 @@
|
||||
# OpenClaw Windows local gateway: WSL design validation
|
||||
|
||||
This document describes the WSL design that ships in this PR. It reflects Craig
|
||||
Loewen's authoritative review of `docs/wsl-owner-open-issues.md` (verbatim Q&A
|
||||
reproduced inline in that companion doc). Where the prototype enumerated
|
||||
options, this version states the chosen design.
|
||||
|
||||
The current scope is:
|
||||
|
||||
- A dedicated app-owned **Ubuntu-24.04** WSL2 instance named `OpenClawGateway`,
|
||||
created from the standard Ubuntu Store package and then configured by the
|
||||
Windows tray.
|
||||
- The public OpenClaw Linux installer (`https://openclaw.ai/install-cli.sh`)
|
||||
runs unchanged inside that instance with prefix `/opt/openclaw`.
|
||||
- **Loopback-only** local networking (`http://localhost:18789`) between the
|
||||
Windows tray and the gateway.
|
||||
- Repair / restart via instance-scoped `wsl --terminate OpenClawGateway`.
|
||||
- Diagnostics on failure pointed at <https://aka.ms/wsllogs>.
|
||||
- The Windows tray pairs as both **operator** and **node** against the local
|
||||
gateway (matching the macOS app's in-app node model). No worker-in-WSL is
|
||||
installed by the Windows tray in this PR.
|
||||
|
||||
Out of scope for this PR (explicitly):
|
||||
|
||||
- No custom OpenClaw rootfs / OpenClaw-distributed Linux image.
|
||||
- No `--web-download` / `--from-file` / signed offline-base-artifact fallback.
|
||||
- No WSL-IP / `lan` / `auto`-bind fallback. No `gateway.bind` overrides.
|
||||
- No global `.wslconfig` mutation. No global `wsl --shutdown` from any product
|
||||
or validation path.
|
||||
- No `\\wsl$` or `\\wsl.localhost` file I/O. All WSL file operations go through
|
||||
`wsl.exe -d OpenClawGateway -- ...`.
|
||||
|
||||
## High-level user experience
|
||||
|
||||
1. User installs or opens the Windows tray app.
|
||||
2. The first onboarding page (`SetupWarningPage`) offers **Set up locally**
|
||||
(default) or **Advanced setup**.
|
||||
3. **Set up locally** opens `LocalSetupProgressPage`, which drives
|
||||
`LocalGatewaySetupEngine` to:
|
||||
- preflight the WSL host;
|
||||
- create the `OpenClawGateway` instance from Ubuntu-24.04;
|
||||
- apply OpenClaw-owned WSL configuration (`/etc/wsl.conf`,
|
||||
`/etc/wsl-distribution.conf`, `openclaw` user, state directories);
|
||||
- install OpenClaw via the public installer;
|
||||
- prepare and start the gateway service;
|
||||
- mint a bootstrap setup-code via `openclaw qr --json`;
|
||||
- pair the Windows tray operator and Windows tray node;
|
||||
- verify end-to-end reachability over loopback.
|
||||
4. On terminal failure, the page surfaces a link to <https://aka.ms/wsllogs>;
|
||||
no internal log scraping is attempted.
|
||||
|
||||
## End-state architecture
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
subgraph Windows["Windows user session"]
|
||||
Tray["OpenClaw Tray app"]
|
||||
Identity["%APPDATA%\OpenClawTray\<br/>device-key-ed25519.json (operator + node)"]
|
||||
Engine["LocalGatewaySetupEngine"]
|
||||
WslFeature["Windows WSL platform"]
|
||||
end
|
||||
|
||||
subgraph WSL["WSL2: OpenClawGateway"]
|
||||
Ubuntu["Ubuntu-24.04 (Store)"]
|
||||
WslConf["/etc/wsl.conf<br/>systemd=true<br/>automount=false<br/>interop=false<br/>appendWindowsPath=false<br/>default user=openclaw"]
|
||||
DistroConf["/etc/wsl-distribution.conf<br/>shortcut=false<br/>terminal=false"]
|
||||
Systemd["systemd"]
|
||||
Installer["public installer<br/>install-cli.sh<br/>--prefix /opt/openclaw"]
|
||||
GatewaySvc["openclaw gateway<br/>bind=loopback :18789"]
|
||||
State["/var/lib/openclaw"]
|
||||
end
|
||||
|
||||
Tray --> Engine
|
||||
Engine -->|"wsl --install Ubuntu-24.04 --name OpenClawGateway --location <appdata>\OpenClawTray\wsl --no-launch --version 2"| WslFeature
|
||||
WslFeature --> Ubuntu
|
||||
Ubuntu --> WslConf
|
||||
Ubuntu --> DistroConf
|
||||
WslConf --> Systemd
|
||||
Engine -->|"wsl -d OpenClawGateway -u root -- bash install-cli.sh"| Installer
|
||||
Installer --> GatewaySvc
|
||||
Systemd --> GatewaySvc
|
||||
GatewaySvc --> State
|
||||
Tray -->|"http://localhost:18789 (operator + node WebSocket sessions)"| GatewaySvc
|
||||
Tray --> Identity
|
||||
```
|
||||
|
||||
## WSL touch points
|
||||
|
||||
### Dedicated WSL instance lifecycle
|
||||
|
||||
The tray treats WSL as an application-owned runtime boundary and uses a single
|
||||
dedicated WSL2 instance named `OpenClawGateway`. The base is **Ubuntu-24.04**
|
||||
from the Store; the OpenClaw-owned configuration is applied after the instance
|
||||
is laid down.
|
||||
|
||||
| Operation | WSL command | Scope |
|
||||
| --- | --- | --- |
|
||||
| Preflight | `wsl.exe --status`, `wsl.exe --list --verbose` | Read-only WSL capability checks |
|
||||
| Instance creation | `wsl.exe --install Ubuntu-24.04 --name OpenClawGateway --location <%LOCALAPPDATA%>\OpenClawTray\wsl --no-launch --version 2` | Creates only the dedicated OpenClaw instance |
|
||||
| In-instance configuration | `wsl.exe -d OpenClawGateway -u root -- ...` | Writes `/etc/wsl.conf`, `/etc/wsl-distribution.conf`, creates `openclaw` user and state dirs |
|
||||
| Default user | `wsl.exe --manage OpenClawGateway --set-default-user openclaw` | Locks default user to `openclaw` |
|
||||
| Apply config | `wsl.exe --terminate OpenClawGateway` (then implicit restart on next command) | Picks up `wsl.conf` changes |
|
||||
| Public OpenClaw install | `wsl.exe -d OpenClawGateway -u root -- bash -c "curl -fsSL https://openclaw.ai/install-cli.sh \| bash -s -- --prefix /opt/openclaw"` | Runs the public installer unchanged |
|
||||
| Service start/check | `wsl.exe -d OpenClawGateway -u root -- systemctl ...` | Starts/checks OpenClaw gateway |
|
||||
| Repair | `wsl.exe --terminate OpenClawGateway` | Instance-scoped restart only |
|
||||
| Remove | `wsl.exe --terminate OpenClawGateway`, `wsl.exe --unregister OpenClawGateway` | Requires explicit user confirmation |
|
||||
|
||||
Guarantees:
|
||||
|
||||
- **WSL2 only** for the OpenClaw instance.
|
||||
- The tray never modifies the user's default WSL instance.
|
||||
- The tray never modifies global `.wslconfig`.
|
||||
- The tray never calls global `wsl.exe --shutdown` in any product, validation,
|
||||
repair, or removal path.
|
||||
- The tray never unregisters arbitrary WSL instances; only the exact
|
||||
`OpenClawGateway` name is eligible, and destructive cleanup requires explicit
|
||||
confirmation in scripts.
|
||||
|
||||
### Install command and success criterion
|
||||
|
||||
The single canonical install primitive is:
|
||||
|
||||
```powershell
|
||||
wsl.exe --install Ubuntu-24.04 `
|
||||
--name OpenClawGateway `
|
||||
--location "$env:LOCALAPPDATA\OpenClawTray\wsl" `
|
||||
--no-launch `
|
||||
--version 2
|
||||
```
|
||||
|
||||
Success criterion (per Craig): **trust the `wsl --install` exit code**.
|
||||
There is no postcondition-on-hang fallback. After exit, the engine confirms
|
||||
that `OpenClawGateway` appears in `wsl --list --quiet`; failure of that
|
||||
post-condition is treated as install failure regardless of stdout.
|
||||
|
||||
`Ubuntu-24.04` is used explicitly (not the generic `Ubuntu` channel). No
|
||||
`--web-download` and no `--from-file` are used; there is no offline base
|
||||
fallback in this PR.
|
||||
|
||||
#### Empirical evidence
|
||||
|
||||
The literature recommendation (`wsl --install` over `winget install
|
||||
Canonical.Ubuntu.2404`) was confirmed empirically on 2026-05-04 with a 20-iter
|
||||
harness:
|
||||
|
||||
| Path | success | failure | strict success rate |
|
||||
|---|---:|---:|---|
|
||||
| `wsl --install Ubuntu-24.04 --name <gen> --location <path> --no-launch --version 2` | 10 | 0 | **10/10** |
|
||||
| `winget install --id Canonical.Ubuntu.2404 -e --silent --accept-source-agreements --accept-package-agreements --disable-interactivity` | 0 | 10 | **0/10** |
|
||||
|
||||
Success ≡ exit 0 AND target distro registered in `wsl --list --quiet`.
|
||||
|
||||
Root cause for winget 0/10: `Canonical.Ubuntu.2404` is the launcher APPX, not
|
||||
a WSL distro creator; with `--silent --disable-interactivity` the launcher is
|
||||
never invoked, so the APPX stages but no distro registers. winget cannot pass
|
||||
`--name` or `--location` to the launcher.
|
||||
|
||||
Harness, raw timings, exit codes, and per-iteration `detail.json`:
|
||||
`artifacts/wsl-install-vs-winget/run-20260504-131837/summary.json`. (The
|
||||
`artifacts/` tree is gitignored; the summary will be present on any host that
|
||||
runs `scripts/experiments/wsl-install-vs-winget-empirical-2026-05-04.ps1`.)
|
||||
|
||||
A deeper winget research thread is in flight (Aaron-9, prototype worktree).
|
||||
That work may broaden the picture for `winget install Microsoft.WSL` as a
|
||||
**platform** repair fallback — it does not change the Phase 3 decision to use
|
||||
`wsl --install` for distro creation in this PR.
|
||||
|
||||
### `/etc/wsl.conf`
|
||||
|
||||
```ini
|
||||
[boot]
|
||||
systemd=true
|
||||
|
||||
[automount]
|
||||
enabled=false
|
||||
mountFsTab=false
|
||||
|
||||
[interop]
|
||||
enabled=false
|
||||
appendWindowsPath=false
|
||||
|
||||
[user]
|
||||
default=openclaw
|
||||
|
||||
[time]
|
||||
useWindowsTimezone=true
|
||||
```
|
||||
|
||||
Rationale (Craig confirmed all settings appropriate for an app-owned
|
||||
appliance):
|
||||
|
||||
- `systemd=true` — gateway is a systemd-managed service.
|
||||
- `automount.enabled=false` / `mountFsTab=false` — the gateway does not need
|
||||
Windows drive mounts.
|
||||
- `interop.enabled=false` / `appendWindowsPath=false` — the appliance does not
|
||||
shell out to Windows binaries.
|
||||
- `default=openclaw` — non-root default user; root only via explicit
|
||||
`wsl.exe -d OpenClawGateway -u root -- ...`.
|
||||
- `useWindowsTimezone=true` — gateway timestamps align with the user's
|
||||
Windows session.
|
||||
|
||||
Per Craig: no post-clone repairs needed (machine-id / DNS / timezone work as
|
||||
delivered by Ubuntu-24.04).
|
||||
|
||||
### `/etc/wsl-distribution.conf`
|
||||
|
||||
```ini
|
||||
[oobe]
|
||||
defaultName=OpenClawGateway
|
||||
|
||||
[shortcut]
|
||||
enabled=false
|
||||
|
||||
[terminal]
|
||||
enabled=false
|
||||
```
|
||||
|
||||
Rationale: the OpenClaw instance is an implementation detail; users should not
|
||||
see a Start menu shortcut or Windows Terminal profile for it. Craig confirmed
|
||||
this is the correct use of `wsl-distribution.conf` for a privately managed
|
||||
instance.
|
||||
|
||||
### Networking — loopback only
|
||||
|
||||
The gateway binds to **loopback inside WSL on port 18789**. The Windows tray
|
||||
connects via `http://localhost:18789` / `ws://localhost:18789`.
|
||||
|
||||
Per Craig: Windows localhost forwarding to a WSL2 service is a reliable core
|
||||
WSL promise. **No** WSL-IP fallback. **No** `lan` or `auto` bind. **No**
|
||||
`gateway.bind` overrides written by the tray. **No** Windows portproxy or
|
||||
firewall mutation.
|
||||
|
||||
The endpoint resolver and validation runner do not enumerate WSL interface
|
||||
addresses, do not run `hostname -I` / `ip -4 addr` / `ip route` / `ss -ltnp`
|
||||
inside WSL, and do not promote between bind modes. There is one Windows-side
|
||||
TCP listener snapshot of port 18789 plus a loopback `/health` probe.
|
||||
|
||||
Off-box / LAN / phone reachability is out of scope for this PR and will be
|
||||
handled separately when relay ownership and protocol are clear.
|
||||
|
||||
### Lifecycle and service ownership
|
||||
|
||||
- The gateway is started/managed via upstream OpenClaw CLI commands invoked
|
||||
through `wsl.exe -d OpenClawGateway -u root -- ...`.
|
||||
- `loginctl enable-linger openclaw` plus a tray-owned WSL keepalive
|
||||
(`wsl.exe -d OpenClawGateway -u openclaw -- sleep 2147483647`) keep the
|
||||
instance reachable while local-gateway mode is active. Both patterns are
|
||||
acceptable per Craig.
|
||||
- Repair primitive: `wsl.exe --terminate OpenClawGateway`. Global
|
||||
`wsl --shutdown` is **never** issued.
|
||||
- Removal: `wsl.exe --unregister OpenClawGateway` only (after explicit user
|
||||
confirmation), preceded by `wsl.exe --terminate OpenClawGateway`. Cleanup
|
||||
also removes the install-location directory.
|
||||
|
||||
Product readiness for the gateway requires all of:
|
||||
|
||||
1. service start/restart command returns;
|
||||
2. WSL listener exists on `:18789`;
|
||||
3. Windows-side `http://localhost:18789/health` probe succeeds;
|
||||
4. gateway status / RPC succeeds with the device token;
|
||||
5. setup-code mint succeeds.
|
||||
|
||||
`systemctl active` alone is not treated as readiness.
|
||||
|
||||
### Diagnostics
|
||||
|
||||
On any setup failure, the engine and validation script surface the link
|
||||
<https://aka.ms/wsllogs> for the user/maintainer to collect WSL logs. The
|
||||
product does **not** scrape WSL internal log files or invoke
|
||||
`wsl --shutdown` to collect them. The validation script's
|
||||
`Save-DiagnosticsSnapshot` records `wslLogsHelp = https://aka.ms/wsllogs` and
|
||||
`Write-Summary` appends a "Diagnostics: see https://aka.ms/wsllogs..." note
|
||||
to `summary.md` on failure.
|
||||
|
||||
### Host filesystem and file I/O
|
||||
|
||||
All WSL file operations from Windows go through `wsl.exe -d OpenClawGateway
|
||||
-- ...` subprocess calls. `\\wsl$` and `\\wsl.localhost` are forbidden in
|
||||
product code, validation scripts, tests, and ad-hoc PowerShell. The instance
|
||||
does not depend on any Windows drive mount after setup.
|
||||
|
||||
### Pairing and protocol boundary
|
||||
|
||||
OpenClaw pairing is implemented entirely through the upstream OpenClaw
|
||||
protocol. The tray never edits gateway pairing stores directly.
|
||||
|
||||
1. Gateway starts with local token auth from
|
||||
`/var/lib/openclaw/gateway.env`.
|
||||
2. Tray invokes `wsl.exe -d OpenClawGateway -- openclaw qr --json` and
|
||||
decodes the upstream setup-code payload (with short-lived bootstrap
|
||||
token).
|
||||
3. Tray (operator) connects over WebSocket using its Ed25519 device identity
|
||||
and `auth.bootstrapToken`; gateway returns `hello-ok.auth.deviceToken`,
|
||||
stored in `%APPDATA%\OpenClawTray\device-key-ed25519.json` (operator
|
||||
token field).
|
||||
4. Tray (node) opens a separate WebSocket session with role `node` and
|
||||
pairs through the same setup-code/bootstrap-token flow; the resulting
|
||||
device token is stored in the same identity file under the **node**
|
||||
field.
|
||||
5. Subsequent reconnects use `auth.deviceToken`. Node tokens are never
|
||||
reused as `auth.token` and vice versa.
|
||||
|
||||
Identity-path invariant: operator and node device tokens share
|
||||
`%APPDATA%\OpenClawTray\device-key-ed25519.json` (`OPENCLAW_TRAY_APPDATA_DIR`
|
||||
override honored), with role distinction inside the file. The
|
||||
prototype-era split between `%APPDATA%` (operator) and `%LOCALAPPDATA%`
|
||||
(node) was closed in Phase 4.
|
||||
|
||||
The Windows tray node parallels the macOS app's in-app node model
|
||||
(`MacNodeModeCoordinator` with role `node`, separate session, capabilities
|
||||
declared). No WSL-internal worker is paired by the Windows tray in this PR.
|
||||
|
||||
## Validation
|
||||
|
||||
`scripts/validate-wsl-gateway.ps1` provides four scenarios. Each writes a
|
||||
JSON+markdown summary under `artifacts/validate-wsl-gateway/<run-id>/`.
|
||||
|
||||
Validation AppData isolation uses this canonical contract:
|
||||
|
||||
- `OPENCLAW_TRAY_DATA_DIR` is the settings/logs/run-marker root consumed by
|
||||
`SettingsManager`, `App.DataPath`, `Logger`, and token path resolution.
|
||||
- `OPENCLAW_TRAY_APPDATA_DIR` is the roaming identity-store root consumed by
|
||||
`DeviceIdentity`/pairing paths. Validation sets it alongside
|
||||
`OPENCLAW_TRAY_DATA_DIR` for backward compatibility and identity isolation.
|
||||
- `OPENCLAW_TRAY_LOCALAPPDATA_DIR` is the local setup-state/WSL-install root.
|
||||
|
||||
| Scenario | What it does | When to use | Destructive |
|
||||
|---|---|---|---|
|
||||
| `PreflightOnly` | Repo-layout sanity, WSL host status (`wsl --status`, `wsl --list --verbose`), relay-prototype probe (NotAvailable when no probe URI). No build, no install, no WSL state mutation. | Cheap CI / local sanity check. Safe on dev box. | No |
|
||||
| `UpstreamInstall` | Build + tests, then drives the tray onboarding so the product itself runs the canonical `wsl --install Ubuntu-24.04 --name OpenClawGateway --location <path> --no-launch --version 2` path. Smoke + bootstrap-token + operator+node pairing proofs over loopback. Reuses an existing `OpenClawGateway` instance if present. | Lab / dedicated machine. End-to-end product path. | Reuses existing distro state |
|
||||
| `FreshMachine` | `UpstreamInstall` after a fresh-machine reset: `wsl --unregister OpenClawGateway` + AppData wipe (single shot). | Lab. Fresh install proof. | Yes, scoped to `OpenClawGateway` |
|
||||
| `Recreate` | Iterated `FreshMachine`. Supports `-Iterations`. Uses `wsl --unregister` only — **never** `wsl --shutdown`. | Lab / repeatability harness. | Yes, scoped to `OpenClawGateway` |
|
||||
|
||||
Scenarios deliberately removed from the prototype: `BuildRootfs`,
|
||||
`InstallOnly`, `Smoke`, `Full`, `Loop`. Parameters deliberately removed:
|
||||
`-BuildDevRootfs`, `-BaseRootfsPath`, `-GatewayPackagePath`,
|
||||
`-UseExistingManifest`, `-RootfsPath`, `-AllowUnsignedDevArtifact`,
|
||||
`-SigningKeyId`, `-PublicKeyPath`,
|
||||
`-AllowNonStandardDistroNameForDestructiveClean`, `-NetworkingMode`,
|
||||
`-LoopMode`, `-RequireWorkerPairing`, `-CleanOpenClawState`,
|
||||
`-GoSkillProofCommand`, `-RequireGoSkillProof`.
|
||||
|
||||
The validation script:
|
||||
|
||||
- Drives onboarding via the `SetupWarningPage` "Set up locally" button
|
||||
(`OnboardingSetupLocal` automation ID); `LocalSetupProgressPage` autostarts
|
||||
the engine on appearance.
|
||||
- Polls `setup-state.json` for `Complete` (terminal status). Worker / rootfs
|
||||
phases are gone; terminal status is `Complete` only.
|
||||
- Snapshots loopback diagnostics on failure (Windows-side `:18789` listener
|
||||
state; loopback `/health` probe). Does **not** run any networking probes
|
||||
inside WSL.
|
||||
- Redacts sensitive output: `Redact-SensitiveGatewayOutput` over
|
||||
`openclaw qr --json` stdout, `Save-RedactedSettings` strips `Token`,
|
||||
`GatewayToken`, `BootstrapToken`, `bootstrap_token`, `NodeToken`,
|
||||
`nodeToken`; relay probe body strips `token=...`.
|
||||
|
||||
Scope guarantees from the validation script:
|
||||
|
||||
- Only `OpenClawGateway` is ever the target of `wsl --unregister`.
|
||||
- Global `wsl --shutdown` is never issued.
|
||||
- No `\\wsl$` or `\\wsl.localhost` paths are read or written.
|
||||
|
||||
Companion script:
|
||||
`scripts/reset-openclaw-wsl-validation-state.ps1` — exact-target gated
|
||||
cleanup for `OpenClawGateway` plus the `%APPDATA%\OpenClawTray` and
|
||||
`%LOCALAPPDATA%\OpenClawTray` directories. Refuses to act on any other distro
|
||||
name.
|
||||
|
||||
## Outstanding follow-ups
|
||||
|
||||
Tracked but outside the scope of this PR:
|
||||
|
||||
- Off-box / LAN / phone reachability via OpenClaw relay (blocked on relay
|
||||
ownership / protocol clarity).
|
||||
- Optional `winget install Microsoft.WSL` as a **platform** repair fallback
|
||||
(deeper research in flight). Distro creation stays on `wsl --install`
|
||||
regardless.
|
||||
- Internationalization of the onboarding copy (`Onboarding_SetupWarning_*`
|
||||
/ `Onboarding_LocalSetupProgress_*` resw entries across the supported
|
||||
locales).
|
||||
|
||||
See `docs/wsl-owner-open-issues.md` for the structured Q&A explaining **why**
|
||||
this design is what it is, with Craig's verbatim answers.
|
||||
18
merge-analysis.txt
Normal file
18
merge-analysis.txt
Normal file
@ -0,0 +1,18 @@
|
||||
a256a33ad6fa5ebe49a7f6dca52b1992703db37c
|
||||
100644 98df1e8026dae834daa8d88010057cc8ce2e831b 1 src/OpenClaw.Shared/WindowsNodeClient.cs
|
||||
100644 0068d68df237a6f35f31590874551a46a62e1bbb 2 src/OpenClaw.Shared/WindowsNodeClient.cs
|
||||
100644 144e88d3a56273e113a28595804950286af3b053 3 src/OpenClaw.Shared/WindowsNodeClient.cs
|
||||
100644 de0780f9aa39a6eb8e70382cac0b4d4602e96f97 1 src/OpenClaw.Tray.WinUI/App.xaml.cs
|
||||
100644 8d0dc13e7597948da2c20f94edb7fd15de0bd5d2 2 src/OpenClaw.Tray.WinUI/App.xaml.cs
|
||||
100644 76ea7d737152707e43f65f0949d221d9f24aa9a5 3 src/OpenClaw.Tray.WinUI/App.xaml.cs
|
||||
100644 8e9f269d393e734bbce5aa0b749b7391018cfc2e 1 tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs
|
||||
100644 97765fd9ff56d02861a932454ce31589187c56dd 2 tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs
|
||||
100644 4fdde3a5ab3bfed959edd3bfdb8a9b4969b3ce98 3 tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs
|
||||
|
||||
Auto-merging src/OpenClaw.Shared/WebSocketClientBase.cs
|
||||
Auto-merging src/OpenClaw.Shared/WindowsNodeClient.cs
|
||||
CONFLICT (content): Merge conflict in src/OpenClaw.Shared/WindowsNodeClient.cs
|
||||
Auto-merging src/OpenClaw.Tray.WinUI/App.xaml.cs
|
||||
CONFLICT (content): Merge conflict in src/OpenClaw.Tray.WinUI/App.xaml.cs
|
||||
Auto-merging tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs
|
||||
CONFLICT (content): Merge conflict in tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs
|
||||
14
moltbot-windows-hub.slnx
Normal file
14
moltbot-windows-hub.slnx
Normal file
@ -0,0 +1,14 @@
|
||||
<Solution>
|
||||
<Folder Name="/src/">
|
||||
<Project Path="src/OpenClaw.Cli/OpenClaw.Cli.csproj" />
|
||||
<Project Path="src/OpenClaw.CommandPalette/OpenClaw.CommandPalette.csproj">
|
||||
<Platform Project="x64" />
|
||||
</Project>
|
||||
<Project Path="src/OpenClaw.Shared/OpenClaw.Shared.csproj" />
|
||||
<Project Path="src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj" />
|
||||
</Folder>
|
||||
<Folder Name="/tests/">
|
||||
<Project Path="tests/OpenClaw.Shared.Tests/OpenClaw.Shared.Tests.csproj" />
|
||||
<Project Path="tests/OpenClaw.Tray.Tests/OpenClaw.Tray.Tests.csproj" />
|
||||
</Folder>
|
||||
</Solution>
|
||||
@ -1,38 +0,0 @@
|
||||
<Solution>
|
||||
<Configurations>
|
||||
<Platform Name="Any CPU" />
|
||||
<Platform Name="x64" />
|
||||
<Platform Name="ARM64" />
|
||||
<BuildType Name="Debug" />
|
||||
<BuildType Name="Release" />
|
||||
</Configurations>
|
||||
<Folder Name="/src/">
|
||||
<Project Path="src/OpenClaw.Cli/OpenClaw.Cli.csproj" />
|
||||
<Project Path="src/OpenClaw.WinNode.Cli/OpenClaw.WinNode.Cli.csproj" />
|
||||
<Project Path="src/OpenClaw.CommandPalette/OpenClaw.CommandPalette.csproj">
|
||||
<Platform Solution="*|Any CPU" Project="x64" />
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
<Platform Solution="*|ARM64" Project="ARM64" />
|
||||
</Project>
|
||||
<Project Path="src/OpenClaw.Shared/OpenClaw.Shared.csproj" />
|
||||
<Project Path="src/OpenClawTray.FunctionalUI/OpenClawTray.FunctionalUI.csproj" />
|
||||
<Project Path="src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj">
|
||||
<!-- WindowsAppSDK.SelfContained requires a concrete Platform (x64/ARM64); AnyCPU would need a RID. -->
|
||||
<Platform Solution="*|Any CPU" Project="x64" />
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
<Platform Solution="*|ARM64" Project="ARM64" />
|
||||
</Project>
|
||||
</Folder>
|
||||
<Folder Name="/tests/">
|
||||
<Project Path="tests/OpenClaw.Shared.Tests/OpenClaw.Shared.Tests.csproj" />
|
||||
<Project Path="tests/OpenClaw.WinNode.Cli.Tests/OpenClaw.WinNode.Cli.Tests.csproj" />
|
||||
<Project Path="tests/OpenClaw.Tray.Tests/OpenClaw.Tray.Tests.csproj" />
|
||||
<Project Path="tests/OpenClawTray.FunctionalUI.Tests/OpenClawTray.FunctionalUI.Tests.csproj" />
|
||||
<Project Path="tests/OpenClaw.Tray.IntegrationTests/OpenClaw.Tray.IntegrationTests.csproj" />
|
||||
<Project Path="tests/OpenClaw.Tray.UITests/OpenClaw.Tray.UITests.csproj">
|
||||
<Platform Solution="*|Any CPU" Project="x64" />
|
||||
<Platform Solution="*|x64" Project="x64" />
|
||||
<Platform Solution="*|ARM64" Project="ARM64" />
|
||||
</Project>
|
||||
</Folder>
|
||||
</Solution>
|
||||
3148
pr117_diff.txt
Normal file
3148
pr117_diff.txt
Normal file
File diff suppressed because it is too large
Load Diff
33200
pr120_full.diff
Normal file
33200
pr120_full.diff
Normal file
File diff suppressed because it is too large
Load Diff
607
pr80.diff
Normal file
607
pr80.diff
Normal file
@ -0,0 +1,607 @@
|
||||
diff --git a/src/OpenClaw.Shared/WebSocketClientBase.cs b/src/OpenClaw.Shared/WebSocketClientBase.cs
|
||||
index 72c4d10..ec850f3 100644
|
||||
--- a/src/OpenClaw.Shared/WebSocketClientBase.cs
|
||||
+++ b/src/OpenClaw.Shared/WebSocketClientBase.cs
|
||||
@@ -186,30 +186,45 @@ private async Task ListenForMessagesAsync()
|
||||
|
||||
protected async Task ReconnectWithBackoffAsync()
|
||||
{
|
||||
- var delay = BackoffMs[Math.Min(_reconnectAttempts, BackoffMs.Length - 1)];
|
||||
- _reconnectAttempts++;
|
||||
- _logger.Warn($"{ClientRole} reconnecting in {delay}ms (attempt {_reconnectAttempts})");
|
||||
- RaiseStatusChanged(ConnectionStatus.Connecting);
|
||||
+ var emittedConnecting = false;
|
||||
|
||||
- try
|
||||
+ while (!_disposed)
|
||||
{
|
||||
- await Task.Delay(delay, _cts.Token);
|
||||
+ var delay = BackoffMs[Math.Min(_reconnectAttempts, BackoffMs.Length - 1)];
|
||||
+ _reconnectAttempts++;
|
||||
+ _logger.Warn($"{ClientRole} reconnecting in {delay}ms (attempt {_reconnectAttempts})");
|
||||
+ if (!emittedConnecting)
|
||||
+ {
|
||||
+ RaiseStatusChanged(ConnectionStatus.Connecting);
|
||||
+ emittedConnecting = true;
|
||||
+ }
|
||||
|
||||
- // Check cancellation after delay
|
||||
- if (_cts.Token.IsCancellationRequested) return;
|
||||
+ try
|
||||
+ {
|
||||
+ await Task.Delay(delay, _cts.Token);
|
||||
|
||||
- // Safely dispose old socket
|
||||
- var oldSocket = _webSocket;
|
||||
- _webSocket = null;
|
||||
- try { oldSocket?.Dispose(); } catch { /* ignore dispose errors */ }
|
||||
+ if (_cts.Token.IsCancellationRequested) return;
|
||||
|
||||
- await ConnectAsync();
|
||||
- }
|
||||
- catch (OperationCanceledException) { }
|
||||
- catch (Exception ex)
|
||||
- {
|
||||
- _logger.Error($"{ClientRole} reconnect failed", ex);
|
||||
- RaiseStatusChanged(ConnectionStatus.Error);
|
||||
+ // Safely dispose old socket before retrying the connection.
|
||||
+ var oldSocket = _webSocket;
|
||||
+ _webSocket = null;
|
||||
+ try { oldSocket?.Dispose(); } catch { /* ignore dispose errors */ }
|
||||
+
|
||||
+ await ConnectAsync();
|
||||
+ if (IsConnected)
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+ catch (OperationCanceledException)
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+ catch (Exception ex)
|
||||
+ {
|
||||
+ _logger.Error($"{ClientRole} reconnect failed", ex);
|
||||
+ RaiseStatusChanged(ConnectionStatus.Error);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/src/OpenClaw.Shared/WindowsNodeClient.cs b/src/OpenClaw.Shared/WindowsNodeClient.cs
|
||||
index 98df1e8..144e88d 100644
|
||||
--- a/src/OpenClaw.Shared/WindowsNodeClient.cs
|
||||
+++ b/src/OpenClaw.Shared/WindowsNodeClient.cs
|
||||
@@ -25,6 +25,8 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
private string? _nodeId;
|
||||
private string? _pendingNonce; // Store nonce from challenge for signing
|
||||
private bool _isPendingApproval; // True when connected but awaiting pairing approval
|
||||
+ private bool _isPaired;
|
||||
+ private bool _pairingApprovedAwaitingReconnect; // True after approval event until the next successful reconnect
|
||||
|
||||
// Cached serialization/validation ΓÇö reused on every message instead of allocating per-call
|
||||
private static readonly JsonSerializerOptions s_ignoreNullOptions = new()
|
||||
@@ -46,8 +48,8 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
/// <summary>True if connected but waiting for pairing approval on gateway</summary>
|
||||
public bool IsPendingApproval => _isPendingApproval;
|
||||
|
||||
- /// <summary>True if device is paired (has a device token)</summary>
|
||||
- public bool IsPaired => !string.IsNullOrEmpty(_deviceIdentity.DeviceToken);
|
||||
+ /// <summary>True if device is paired or approved for use by the gateway</summary>
|
||||
+ public bool IsPaired => _isPaired || !string.IsNullOrEmpty(_deviceIdentity.DeviceToken);
|
||||
|
||||
/// <summary>Device ID for display/approval (first 16 chars of full ID)</summary>
|
||||
public string ShortDeviceId => _deviceIdentity.DeviceId.Length > 16
|
||||
@@ -182,9 +184,93 @@ private async Task HandleEventAsync(JsonElement root)
|
||||
case "connect.challenge":
|
||||
await HandleConnectChallengeAsync(root);
|
||||
break;
|
||||
+ case "node.pair.requested":
|
||||
+ case "device.pair.requested":
|
||||
+ HandlePairingRequestedEvent(root, eventType);
|
||||
+ break;
|
||||
case "node.invoke.request":
|
||||
await HandleNodeInvokeEventAsync(root);
|
||||
break;
|
||||
+ case "node.pair.resolved":
|
||||
+ case "device.pair.resolved":
|
||||
+ await HandlePairingResolvedEventAsync(root, eventType);
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ private void HandlePairingRequestedEvent(JsonElement root, string? eventType)
|
||||
+ {
|
||||
+ if (!root.TryGetProperty("payload", out var payload))
|
||||
+ {
|
||||
+ _logger.Warn($"[NODE] {eventType} has no payload");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (!PayloadTargetsCurrentDevice(payload))
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (_isPendingApproval)
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ _isPendingApproval = true;
|
||||
+ _isPaired = false;
|
||||
+ _pairingApprovedAwaitingReconnect = false;
|
||||
+ _logger.Info($"[NODE] Pairing request received for this device via {eventType}");
|
||||
+ _logger.Info($"To approve, run: openclaw devices approve {_deviceIdentity.DeviceId}");
|
||||
+ PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
+ PairingStatus.Pending,
|
||||
+ _deviceIdentity.DeviceId,
|
||||
+ $"Run: openclaw devices approve {ShortDeviceId}..."));
|
||||
+ }
|
||||
+
|
||||
+ private async Task HandlePairingResolvedEventAsync(JsonElement root, string? eventType)
|
||||
+ {
|
||||
+ if (!root.TryGetProperty("payload", out var payload))
|
||||
+ {
|
||||
+ _logger.Warn($"[NODE] {eventType} has no payload");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (!PayloadTargetsCurrentDevice(payload))
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ var decision = payload.TryGetProperty("decision", out var decisionProp)
|
||||
+ ? decisionProp.GetString()
|
||||
+ : null;
|
||||
+
|
||||
+ _logger.Info($"[NODE] Pairing resolution received for this device: decision={decision ?? "unknown"}");
|
||||
+
|
||||
+ if (string.Equals(decision, "approved", StringComparison.OrdinalIgnoreCase))
|
||||
+ {
|
||||
+ _isPendingApproval = false;
|
||||
+ _isPaired = true;
|
||||
+ _pairingApprovedAwaitingReconnect = true;
|
||||
+ PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
+ PairingStatus.Paired,
|
||||
+ _deviceIdentity.DeviceId,
|
||||
+ "Pairing approved; reconnecting to refresh node state."));
|
||||
+
|
||||
+ // Force a fresh handshake so the approved connection can settle into its
|
||||
+ // steady-state paired behavior on the next reconnect.
|
||||
+ _logger.Info("[NODE] Closing socket after pairing approval to refresh node connection...");
|
||||
+ await CloseWebSocketAsync();
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (string.Equals(decision, "rejected", StringComparison.OrdinalIgnoreCase))
|
||||
+ {
|
||||
+ _isPendingApproval = false;
|
||||
+ _isPaired = false;
|
||||
+ PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
+ PairingStatus.Rejected,
|
||||
+ _deviceIdentity.DeviceId,
|
||||
+ "Pairing rejected"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -430,6 +516,12 @@ private void HandleResponse(JsonElement root)
|
||||
{
|
||||
// DEBUG: Log entire response structure
|
||||
_logger.Debug($"[NODE] HandleResponse - ok: {(root.TryGetProperty("ok", out var okVal) ? okVal.ToString() : "missing")}");
|
||||
+ if (root.TryGetProperty("ok", out var okProp) &&
|
||||
+ okProp.ValueKind == JsonValueKind.False)
|
||||
+ {
|
||||
+ HandleRequestError(root);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
if (!root.TryGetProperty("payload", out var payload))
|
||||
{
|
||||
@@ -442,6 +534,7 @@ private void HandleResponse(JsonElement root)
|
||||
// Handle hello-ok (successful registration)
|
||||
if (payload.TryGetProperty("type", out var t) && t.GetString() == "hello-ok")
|
||||
{
|
||||
+ var wasPairedBeforeHello = IsPaired;
|
||||
_isConnected = true;
|
||||
|
||||
// Extract node ID if returned
|
||||
@@ -450,77 +543,155 @@ private void HandleResponse(JsonElement root)
|
||||
_nodeId = nodeIdProp.GetString();
|
||||
}
|
||||
|
||||
+ bool receivedDeviceToken = false;
|
||||
+ bool hasAuthPayload = payload.TryGetProperty("auth", out var authPayload);
|
||||
+
|
||||
// Check for device token in auth (means we're paired!)
|
||||
- if (payload.TryGetProperty("auth", out var authPayload))
|
||||
+ if (hasAuthPayload && authPayload.TryGetProperty("deviceToken", out var deviceTokenProp))
|
||||
{
|
||||
- if (authPayload.TryGetProperty("deviceToken", out var deviceTokenProp))
|
||||
+ var deviceToken = deviceTokenProp.GetString();
|
||||
+ if (!string.IsNullOrEmpty(deviceToken))
|
||||
{
|
||||
- var deviceToken = deviceTokenProp.GetString();
|
||||
- if (!string.IsNullOrEmpty(deviceToken))
|
||||
- {
|
||||
- var wasWaiting = _isPendingApproval;
|
||||
- _isPendingApproval = false;
|
||||
- _logger.Info("Received device token - we are now paired!");
|
||||
- _deviceIdentity.StoreDeviceToken(deviceToken);
|
||||
-
|
||||
- // Fire pairing event if we were waiting
|
||||
- if (wasWaiting)
|
||||
- {
|
||||
- PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
- PairingStatus.Paired,
|
||||
- _deviceIdentity.DeviceId,
|
||||
- "Pairing approved!"));
|
||||
- }
|
||||
- }
|
||||
+ receivedDeviceToken = true;
|
||||
+ _isPendingApproval = false;
|
||||
+ _isPaired = true;
|
||||
+ _pairingApprovedAwaitingReconnect = false;
|
||||
+ _logger.Info("Received device token in hello-ok - we are now paired!");
|
||||
+ _deviceIdentity.StoreDeviceToken(deviceToken);
|
||||
}
|
||||
}
|
||||
-
|
||||
+ else if (_pairingApprovedAwaitingReconnect)
|
||||
+ {
|
||||
+ _logger.Info("hello-ok arrived after pairing approval without auth.deviceToken; keeping local state paired.");
|
||||
+ _pairingApprovedAwaitingReconnect = false;
|
||||
+ }
|
||||
+
|
||||
_logger.Info($"Node registered successfully! ID: {_nodeId ?? _deviceIdentity.DeviceId.Substring(0, 16)}");
|
||||
+ _logger.Info($"[NODE] hello-ok auth present={hasAuthPayload}, receivedDeviceToken={receivedDeviceToken}, storedDeviceToken={!string.IsNullOrEmpty(_deviceIdentity.DeviceToken)}, pendingApproval={_isPendingApproval}, awaitingReconnect={_pairingApprovedAwaitingReconnect}");
|
||||
|
||||
- // Pairing happens at connect time via device identity, no separate request needed
|
||||
- if (string.IsNullOrEmpty(_deviceIdentity.DeviceToken))
|
||||
+ // Current gateways only send hello-ok for approved/accepted nodes, even when
|
||||
+ // auth.deviceToken is omitted, so treat handshake acceptance as paired state.
|
||||
+ _isPendingApproval = false;
|
||||
+ _isPaired = true;
|
||||
+ _logger.Info(string.IsNullOrEmpty(_deviceIdentity.DeviceToken)
|
||||
+ ? "Gateway accepted the node without returning a device token; treating this device as paired"
|
||||
+ : "Already paired with stored device token");
|
||||
+ if (!wasPairedBeforeHello)
|
||||
{
|
||||
- _isPendingApproval = true;
|
||||
- _logger.Info("Not yet paired - check 'openclaw devices list' for pending approval");
|
||||
- _logger.Info($"To approve, run: openclaw devices approve {_deviceIdentity.DeviceId}");
|
||||
+ var pairingMessage = receivedDeviceToken
|
||||
+ ? "Pairing approved!"
|
||||
+ : "Node registration accepted";
|
||||
+
|
||||
PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
- PairingStatus.Pending,
|
||||
+ PairingStatus.Paired,
|
||||
_deviceIdentity.DeviceId,
|
||||
- $"Run: openclaw devices approve {ShortDeviceId}..."));
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- _isPendingApproval = false;
|
||||
- _logger.Info("Already paired with stored device token");
|
||||
- PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
- PairingStatus.Paired,
|
||||
- _deviceIdentity.DeviceId));
|
||||
+ pairingMessage));
|
||||
}
|
||||
|
||||
RaiseStatusChanged(ConnectionStatus.Connected);
|
||||
+ return;
|
||||
}
|
||||
|
||||
- // Handle errors
|
||||
- if (root.TryGetProperty("ok", out var okProp) && !okProp.GetBoolean())
|
||||
+ _logger.Debug("[NODE] Unhandled response payload");
|
||||
+ }
|
||||
+
|
||||
+ private void HandleRequestError(JsonElement root)
|
||||
+ {
|
||||
+ var error = "Unknown error";
|
||||
+ var errorCode = "none";
|
||||
+ string? pairingReason = null;
|
||||
+ string? pairingRequestId = null;
|
||||
+ if (root.TryGetProperty("error", out var errorProp))
|
||||
{
|
||||
- var error = "Unknown error";
|
||||
- var errorCode = "none";
|
||||
- if (root.TryGetProperty("error", out var errorProp))
|
||||
+ if (errorProp.TryGetProperty("message", out var msgProp))
|
||||
+ {
|
||||
+ error = msgProp.GetString() ?? error;
|
||||
+ }
|
||||
+ if (errorProp.TryGetProperty("code", out var codeProp))
|
||||
{
|
||||
- if (errorProp.TryGetProperty("message", out var msgProp))
|
||||
+ errorCode = codeProp.ToString();
|
||||
+ }
|
||||
+ if (errorProp.TryGetProperty("details", out var detailsProp))
|
||||
+ {
|
||||
+ if (detailsProp.TryGetProperty("reason", out var reasonProp))
|
||||
{
|
||||
- error = msgProp.GetString() ?? error;
|
||||
+ pairingReason = reasonProp.GetString();
|
||||
}
|
||||
- if (errorProp.TryGetProperty("code", out var codeProp))
|
||||
+ if (detailsProp.TryGetProperty("requestId", out var requestIdProp))
|
||||
{
|
||||
- errorCode = codeProp.ToString();
|
||||
+ pairingRequestId = requestIdProp.GetString();
|
||||
}
|
||||
}
|
||||
- _logger.Error($"Node registration failed: {error} (code: {errorCode})");
|
||||
- RaiseStatusChanged(ConnectionStatus.Error);
|
||||
}
|
||||
+
|
||||
+ if (string.Equals(errorCode, "NOT_PAIRED", StringComparison.OrdinalIgnoreCase))
|
||||
+ {
|
||||
+ if (_isPendingApproval)
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ _isPendingApproval = true;
|
||||
+ _isPaired = false;
|
||||
+ _pairingApprovedAwaitingReconnect = false;
|
||||
+
|
||||
+ var detail = $"Device {ShortDeviceId} requires approval";
|
||||
+ if (!string.IsNullOrWhiteSpace(pairingRequestId))
|
||||
+ {
|
||||
+ detail += $" (request {pairingRequestId})";
|
||||
+ }
|
||||
+
|
||||
+ _logger.Info($"[NODE] Pairing required for this device; waiting for gateway approval. reason={pairingReason ?? "unknown"}, requestId={pairingRequestId ?? "none"}");
|
||||
+ PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
+ PairingStatus.Pending,
|
||||
+ _deviceIdentity.DeviceId,
|
||||
+ detail));
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ _logger.Error($"Node registration failed: {error} (code: {errorCode})");
|
||||
+ RaiseStatusChanged(ConnectionStatus.Error);
|
||||
}
|
||||
-
|
||||
+
|
||||
+ private bool PayloadTargetsCurrentDevice(JsonElement payload)
|
||||
+ {
|
||||
+ if (TryGetString(payload, "deviceId", out var deviceId) &&
|
||||
+ string.Equals(deviceId, _deviceIdentity.DeviceId, StringComparison.OrdinalIgnoreCase))
|
||||
+ {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (TryGetString(payload, "nodeId", out var nodeId))
|
||||
+ {
|
||||
+ if (!string.IsNullOrEmpty(_nodeId))
|
||||
+ {
|
||||
+ return string.Equals(nodeId, _nodeId, StringComparison.OrdinalIgnoreCase);
|
||||
+ }
|
||||
+
|
||||
+ return string.Equals(nodeId, _deviceIdentity.DeviceId, StringComparison.OrdinalIgnoreCase);
|
||||
+ }
|
||||
+
|
||||
+ if (TryGetString(payload, "instanceId", out var instanceId) &&
|
||||
+ string.Equals(instanceId, _deviceIdentity.DeviceId, StringComparison.OrdinalIgnoreCase))
|
||||
+ {
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ private static bool TryGetString(JsonElement element, string propertyName, out string? value)
|
||||
+ {
|
||||
+ value = null;
|
||||
+ if (!element.TryGetProperty(propertyName, out var prop))
|
||||
+ {
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ value = prop.GetString();
|
||||
+ return !string.IsNullOrEmpty(value);
|
||||
+ }
|
||||
+
|
||||
private async Task HandleRequestAsync(JsonElement root)
|
||||
{
|
||||
if (!root.TryGetProperty("method", out var methodProp)) return;
|
||||
diff --git a/src/OpenClaw.Tray.WinUI/App.xaml.cs b/src/OpenClaw.Tray.WinUI/App.xaml.cs
|
||||
index de0780f..76ea7d7 100644
|
||||
--- a/src/OpenClaw.Tray.WinUI/App.xaml.cs
|
||||
+++ b/src/OpenClaw.Tray.WinUI/App.xaml.cs
|
||||
@@ -54,6 +54,7 @@ public partial class App : Application
|
||||
private GatewayCostUsageInfo? _lastUsageCost;
|
||||
private DateTime _lastCheckTime = DateTime.Now;
|
||||
private DateTime _lastUsageActivityLogUtc = DateTime.MinValue;
|
||||
+ private OpenClaw.Shared.PairingStatus? _lastNodePairingStatus;
|
||||
|
||||
// Session-aware activity tracking
|
||||
private readonly Dictionary<string, AgentActivity> _sessionActivities = new();
|
||||
@@ -1151,43 +1152,43 @@ private void OnNodeStatusChanged(object? sender, ConnectionStatus status)
|
||||
_currentStatus = status;
|
||||
UpdateTrayIcon();
|
||||
}
|
||||
-
|
||||
- // Don't show "connected" toast if waiting for pairing - we'll show pairing status instead
|
||||
- if (status == ConnectionStatus.Connected && _nodeService?.IsPaired == true)
|
||||
- {
|
||||
- try
|
||||
- {
|
||||
- new ToastContentBuilder()
|
||||
- .AddText(LocalizationHelper.GetString("Toast_NodeModeActive"))
|
||||
- .AddText(LocalizationHelper.GetString("Toast_NodeModeActiveDetail"))
|
||||
- .Show();
|
||||
- }
|
||||
- catch { /* ignore */ }
|
||||
- }
|
||||
}
|
||||
|
||||
private void OnPairingStatusChanged(object? sender, OpenClaw.Shared.PairingStatusEventArgs args)
|
||||
{
|
||||
Logger.Info($"Pairing status: {args.Status}");
|
||||
+
|
||||
+ var previousStatus = _lastNodePairingStatus;
|
||||
+ _lastNodePairingStatus = args.Status;
|
||||
|
||||
try
|
||||
{
|
||||
if (args.Status == OpenClaw.Shared.PairingStatus.Pending)
|
||||
{
|
||||
- AddRecentActivity("Node pairing pending", category: "node", dashboardPath: "nodes", nodeId: args.DeviceId);
|
||||
- // Show toast with approval instructions
|
||||
- new ToastContentBuilder()
|
||||
- .AddText(LocalizationHelper.GetString("Toast_PairingPending"))
|
||||
- .AddText(string.Format(LocalizationHelper.GetString("Toast_PairingPendingDetail"), args.DeviceId.Substring(0, 16)))
|
||||
- .Show();
|
||||
+ if (previousStatus != OpenClaw.Shared.PairingStatus.Pending)
|
||||
+ {
|
||||
+ AddRecentActivity("Node pairing pending", category: "node", dashboardPath: "nodes", nodeId: args.DeviceId);
|
||||
+ new ToastContentBuilder()
|
||||
+ .AddText(LocalizationHelper.GetString("Toast_PairingPending"))
|
||||
+ .AddText(string.Format(LocalizationHelper.GetString("Toast_PairingPendingDetail"), args.DeviceId.Substring(0, 16)))
|
||||
+ .Show();
|
||||
+ }
|
||||
}
|
||||
else if (args.Status == OpenClaw.Shared.PairingStatus.Paired)
|
||||
{
|
||||
- AddRecentActivity("Node paired", category: "node", dashboardPath: "nodes", nodeId: args.DeviceId);
|
||||
- new ToastContentBuilder()
|
||||
- .AddText(LocalizationHelper.GetString("Toast_NodePaired"))
|
||||
- .AddText(LocalizationHelper.GetString("Toast_NodePairedDetail"))
|
||||
- .Show();
|
||||
+ if (previousStatus != OpenClaw.Shared.PairingStatus.Paired)
|
||||
+ {
|
||||
+ AddRecentActivity("Node paired", category: "node", dashboardPath: "nodes", nodeId: args.DeviceId);
|
||||
+ new ToastContentBuilder()
|
||||
+ .AddText(LocalizationHelper.GetString("Toast_NodePaired"))
|
||||
+ .AddText(LocalizationHelper.GetString("Toast_NodePairedDetail"))
|
||||
+ .Show();
|
||||
+ }
|
||||
+ }
|
||||
+ else if (args.Status == OpenClaw.Shared.PairingStatus.Unknown)
|
||||
+ {
|
||||
+ AddRecentActivity("Node pairing requires repair", category: "node", dashboardPath: "nodes", nodeId: args.DeviceId);
|
||||
+ Logger.Warn($"Node pairing state is unknown for {args.DeviceId.Substring(0, 16)}. Repair the device token from the gateway or CLI, then reconnect.");
|
||||
}
|
||||
}
|
||||
catch { /* ignore */ }
|
||||
@@ -1608,6 +1609,7 @@ private void OnSettingsSaved(object? sender, EventArgs e)
|
||||
_gatewayClient?.Dispose();
|
||||
var oldNodeService = _nodeService;
|
||||
_nodeService = null;
|
||||
+ _lastNodePairingStatus = null;
|
||||
try { oldNodeService?.Dispose(); } catch (Exception ex) { Logger.Warn($"Node dispose error: {ex.Message}"); }
|
||||
|
||||
if (_settings?.EnableNodeMode == true)
|
||||
diff --git a/tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs b/tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs
|
||||
index 8e9f269..4fdde3a 100644
|
||||
--- a/tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs
|
||||
+++ b/tests/OpenClaw.Shared.Tests/WindowsNodeClientTests.cs
|
||||
@@ -1,5 +1,7 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
+using System.Reflection;
|
||||
+using System.Text.Json;
|
||||
using OpenClaw.Shared;
|
||||
using Xunit;
|
||||
|
||||
@@ -34,4 +36,95 @@ public void Constructor_NormalizesGatewayUrl(string inputUrl, string expectedUrl
|
||||
}
|
||||
}
|
||||
}
|
||||
+
|
||||
+ [Fact]
|
||||
+ public void HandleResponse_HelloOkWithoutDeviceTokenAfterApproval_ClearsAwaitingReconnect()
|
||||
+ {
|
||||
+ var dataPath = Path.Combine(Path.GetTempPath(), $"openclaw-node-test-{Guid.NewGuid():N}");
|
||||
+ Directory.CreateDirectory(dataPath);
|
||||
+
|
||||
+ try
|
||||
+ {
|
||||
+ using var client = new WindowsNodeClient("ws://localhost:18789", "test-token", dataPath);
|
||||
+ SetPrivateField(client, "_isPaired", true);
|
||||
+ SetPrivateField(client, "_pairingApprovedAwaitingReconnect", true);
|
||||
+
|
||||
+ InvokeHandleResponse(client, """
|
||||
+ {
|
||||
+ "type": "res",
|
||||
+ "ok": true,
|
||||
+ "payload": {
|
||||
+ "type": "hello-ok",
|
||||
+ "nodeId": "node-123"
|
||||
+ }
|
||||
+ }
|
||||
+ """);
|
||||
+
|
||||
+ Assert.False((bool)GetPrivateField(client, "_pairingApprovedAwaitingReconnect")!);
|
||||
+ }
|
||||
+ finally
|
||||
+ {
|
||||
+ if (Directory.Exists(dataPath))
|
||||
+ {
|
||||
+ Directory.Delete(dataPath, true);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ [Fact]
|
||||
+ public void HandleResponse_HelloOkWithoutDeviceTokenWhenUnpaired_EmitsNeutralPairedMessage()
|
||||
+ {
|
||||
+ var dataPath = Path.Combine(Path.GetTempPath(), $"openclaw-node-test-{Guid.NewGuid():N}");
|
||||
+ Directory.CreateDirectory(dataPath);
|
||||
+
|
||||
+ try
|
||||
+ {
|
||||
+ using var client = new WindowsNodeClient("ws://localhost:18789", "test-token", dataPath);
|
||||
+ PairingStatusEventArgs? pairingEvent = null;
|
||||
+ client.PairingStatusChanged += (_, args) => pairingEvent = args;
|
||||
+
|
||||
+ InvokeHandleResponse(client, """
|
||||
+ {
|
||||
+ "type": "res",
|
||||
+ "ok": true,
|
||||
+ "payload": {
|
||||
+ "type": "hello-ok",
|
||||
+ "nodeId": "node-123"
|
||||
+ }
|
||||
+ }
|
||||
+ """);
|
||||
+
|
||||
+ Assert.NotNull(pairingEvent);
|
||||
+ Assert.Equal(PairingStatus.Paired, pairingEvent!.Status);
|
||||
+ Assert.Equal("Node registration accepted", pairingEvent.Message);
|
||||
+ }
|
||||
+ finally
|
||||
+ {
|
||||
+ if (Directory.Exists(dataPath))
|
||||
+ {
|
||||
+ Directory.Delete(dataPath, true);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ private static void InvokeHandleResponse(WindowsNodeClient client, string json)
|
||||
+ {
|
||||
+ using var doc = JsonDocument.Parse(json);
|
||||
+ var method = typeof(WindowsNodeClient).GetMethod(
|
||||
+ "HandleResponse",
|
||||
+ BindingFlags.NonPublic | BindingFlags.Instance);
|
||||
+ method!.Invoke(client, new object[] { doc.RootElement.Clone() });
|
||||
+ }
|
||||
+
|
||||
+ private static void SetPrivateField(object instance, string fieldName, object value)
|
||||
+ {
|
||||
+ var field = instance.GetType().GetField(fieldName, BindingFlags.NonPublic | BindingFlags.Instance);
|
||||
+ field!.SetValue(instance, value);
|
||||
+ }
|
||||
+
|
||||
+ private static object? GetPrivateField(object instance, string fieldName)
|
||||
+ {
|
||||
+ var field = instance.GetType().GetField(fieldName, BindingFlags.NonPublic | BindingFlags.Instance);
|
||||
+ return field!.GetValue(instance);
|
||||
+ }
|
||||
}
|
||||
@ -1,326 +0,0 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Dev-loop helper: kill → backup/wipe state → optionally wipe WSL distro → build x64 → (optionally) launch tray.
|
||||
|
||||
.DESCRIPTION
|
||||
Consolidates the full dev-reset cycle used during OpenClaw tray development.
|
||||
Idempotent: no error if nothing is running, state dirs are absent, or the WSL
|
||||
distro is not registered.
|
||||
|
||||
Process kills are always by PID (Stop-Process -Id). Name-based kills are
|
||||
forbidden in this repo.
|
||||
|
||||
WSL file operations use 'wsl bash -c' — never \\wsl$\ paths (which trigger
|
||||
Windows permission prompts via the 9P protocol).
|
||||
|
||||
.PARAMETER WipeWslDistro
|
||||
Also unregister the OpenClawGateway WSL distro (wsl --unregister).
|
||||
Default: off (preserve the distro).
|
||||
|
||||
.PARAMETER CaptureDir
|
||||
If set, exports OPENCLAW_VISUAL_TEST=1 and OPENCLAW_VISUAL_TEST_DIR=<path>
|
||||
before launching the tray so the app auto-captures screenshots.
|
||||
|
||||
.PARAMETER SkipBuild
|
||||
Skip the 'dotnet build' step. Useful when you have just built.
|
||||
|
||||
.PARAMETER DontLaunch
|
||||
Reset and (optionally) build, but do not launch the tray.
|
||||
|
||||
.PARAMETER WorktreePath
|
||||
Root of the git worktree to operate in.
|
||||
Default: result of 'git rev-parse --show-toplevel' in the current directory.
|
||||
|
||||
.PARAMETER NoBackup
|
||||
Instead of backing up state dirs to TEMP, delete them directly.
|
||||
Faster, but no rollback.
|
||||
|
||||
.EXAMPLE
|
||||
.\scripts\dev-reset-rebuild-launch.ps1
|
||||
Standard reset + rebuild + launch (no WSL wipe, no capture).
|
||||
|
||||
.EXAMPLE
|
||||
.\scripts\dev-reset-rebuild-launch.ps1 -WipeWslDistro
|
||||
Full clean slate: also unregister the OpenClawGateway WSL distro.
|
||||
|
||||
.EXAMPLE
|
||||
.\scripts\dev-reset-rebuild-launch.ps1 -DontLaunch
|
||||
Reset + build only (useful before testing manually).
|
||||
|
||||
.EXAMPLE
|
||||
.\scripts\dev-reset-rebuild-launch.ps1 -CaptureDir .\visual-test-output\my-test
|
||||
Reset + build + launch with OPENCLAW_VISUAL_TEST capture enabled.
|
||||
#>
|
||||
|
||||
[CmdletBinding(SupportsShouldProcess)]
|
||||
param(
|
||||
[switch]$WipeWslDistro,
|
||||
[string]$CaptureDir = "",
|
||||
[switch]$SkipBuild,
|
||||
[switch]$DontLaunch,
|
||||
[string]$WorktreePath = "",
|
||||
[switch]$NoBackup
|
||||
)
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
# ─── Resolve worktree path ────────────────────────────────────────────────────
|
||||
|
||||
if ([string]::IsNullOrWhiteSpace($WorktreePath)) {
|
||||
$gitTop = & git rev-parse --show-toplevel 2>$null
|
||||
if ($LASTEXITCODE -ne 0 -or [string]::IsNullOrWhiteSpace($gitTop)) {
|
||||
Write-Error "Cannot resolve worktree path: not inside a git repository and -WorktreePath was not supplied."
|
||||
exit 1
|
||||
}
|
||||
$WorktreePath = $gitTop.Trim()
|
||||
}
|
||||
$WorktreePath = (Resolve-Path -LiteralPath $WorktreePath).Path
|
||||
|
||||
# ─── Constants ────────────────────────────────────────────────────────────────
|
||||
|
||||
$DistroName = "OpenClawGateway"
|
||||
$TrayProject = Join-Path $WorktreePath "src\OpenClaw.Tray.WinUI\OpenClaw.Tray.WinUI.csproj"
|
||||
$AppDataDir = Join-Path $env:APPDATA "OpenClawTray"
|
||||
$LocalAppDataDir = Join-Path $env:LOCALAPPDATA "OpenClawTray"
|
||||
$timestamp = (Get-Date).ToString("yyyy-MM-ddTHH-mm-ss")
|
||||
$BackupRoot = Join-Path $env:TEMP "openclaw-test-backup-$timestamp"
|
||||
|
||||
# ─── Summary state ────────────────────────────────────────────────────────────
|
||||
|
||||
$summary = [ordered]@{
|
||||
backupPath = $null
|
||||
distroState = "not-checked"
|
||||
buildResult = "skipped"
|
||||
launchPid = $null
|
||||
}
|
||||
|
||||
# ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
function Write-Step {
|
||||
param([string]$Icon, [string]$Message)
|
||||
Write-Host " $Icon $Message"
|
||||
}
|
||||
function Write-OK { param([string]$m) Write-Step "✓" $m }
|
||||
function Write-Skip { param([string]$m) Write-Step "-" $m }
|
||||
function Write-Fail { param([string]$m) Write-Step "x" $m }
|
||||
|
||||
function Get-OpenClawProcesses {
|
||||
@(Get-Process -ErrorAction SilentlyContinue | Where-Object { $_.ProcessName -like "OpenClaw*" })
|
||||
}
|
||||
|
||||
function Get-WslDistros {
|
||||
$out = & wsl.exe --list --quiet 2>$null
|
||||
if ($LASTEXITCODE -ne 0 -or $null -eq $out) { return @() }
|
||||
@($out | ForEach-Object { ($_ -replace "`0", "").Trim() } | Where-Object { $_ })
|
||||
}
|
||||
|
||||
# ─── Banner ───────────────────────────────────────────────────────────────────
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "============================================================"
|
||||
Write-Host " OpenClaw Dev Loop -- Reset / Rebuild / Launch"
|
||||
Write-Host "============================================================"
|
||||
Write-Host " Timestamp : $timestamp"
|
||||
Write-Host " WorktreePath : $WorktreePath"
|
||||
Write-Host " WipeWslDistro: $WipeWslDistro SkipBuild: $SkipBuild DontLaunch: $DontLaunch"
|
||||
Write-Host " NoBackup : $NoBackup CaptureDir: $(if ($CaptureDir) { $CaptureDir } else { '(none)' })"
|
||||
if ($WhatIfPreference) {
|
||||
Write-Host " *** WHATIF MODE -- no state will be changed ***"
|
||||
}
|
||||
Write-Host ""
|
||||
|
||||
# =============================================================================
|
||||
# STEP 1 -- Kill OpenClaw* processes (by PID; name-based kills are forbidden)
|
||||
# =============================================================================
|
||||
|
||||
Write-Host "STEP 1: Kill OpenClaw* processes"
|
||||
$procs = @(Get-OpenClawProcesses)
|
||||
|
||||
if ($procs.Count -eq 0) {
|
||||
Write-Skip "No OpenClaw* processes running"
|
||||
}
|
||||
else {
|
||||
foreach ($p in $procs) {
|
||||
if ($PSCmdlet.ShouldProcess("PID $($p.Id) ($($p.ProcessName))", "Stop-Process -Id")) {
|
||||
try {
|
||||
Stop-Process -Id $p.Id -Force
|
||||
Write-OK "Stopped PID $($p.Id) ($($p.ProcessName))"
|
||||
}
|
||||
catch {
|
||||
Write-Fail "Failed to stop PID $($p.Id): $_"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would stop PID $($p.Id) ($($p.ProcessName))"
|
||||
}
|
||||
}
|
||||
if (-not $WhatIfPreference) {
|
||||
Start-Sleep -Milliseconds 500 # brief pause for file-lock release
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# STEP 2 -- Backup or wipe tray state dirs
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "STEP 2: $(if ($NoBackup) { 'Wipe' } else { 'Backup' }) tray state dirs"
|
||||
|
||||
function Invoke-StateDirReset {
|
||||
param([string]$Path, [string]$Label)
|
||||
|
||||
if (-not (Test-Path -LiteralPath $Path)) {
|
||||
Write-Skip "$Label not present -- nothing to do"
|
||||
return
|
||||
}
|
||||
|
||||
if ($NoBackup) {
|
||||
if ($PSCmdlet.ShouldProcess($Path, "Remove-Item -Recurse -Force")) {
|
||||
Remove-Item -LiteralPath $Path -Recurse -Force
|
||||
Write-OK "Deleted $Label ($Path)"
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would delete $Label ($Path)"
|
||||
}
|
||||
}
|
||||
else {
|
||||
$dest = Join-Path $BackupRoot $Label
|
||||
if ($PSCmdlet.ShouldProcess($Path, "Copy-Item to backup then Remove-Item")) {
|
||||
New-Item -ItemType Directory -Force -Path $BackupRoot | Out-Null
|
||||
Copy-Item -LiteralPath $Path -Destination $dest -Recurse -Force
|
||||
Remove-Item -LiteralPath $Path -Recurse -Force
|
||||
Write-OK "Backed up $Label --> $dest"
|
||||
$script:summary.backupPath = $BackupRoot
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would backup $Label --> $dest, then remove source"
|
||||
$script:summary.backupPath = "(whatif) $BackupRoot"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Invoke-StateDirReset -Path $AppDataDir -Label "AppData_OpenClawTray"
|
||||
Invoke-StateDirReset -Path $LocalAppDataDir -Label "LocalAppData_OpenClawTray"
|
||||
|
||||
# =============================================================================
|
||||
# STEP 3 -- Optionally wipe the WSL distro
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "STEP 3: WSL distro ($DistroName)"
|
||||
|
||||
$distros = @(Get-WslDistros)
|
||||
$distroExists = $distros -contains $DistroName
|
||||
|
||||
if (-not $WipeWslDistro) {
|
||||
Write-Skip "-WipeWslDistro not set -- preserving $DistroName"
|
||||
$summary.distroState = if ($distroExists) { "preserved" } else { "absent" }
|
||||
}
|
||||
elseif (-not $distroExists) {
|
||||
Write-Skip "$DistroName is not registered -- nothing to unregister"
|
||||
$summary.distroState = "absent"
|
||||
}
|
||||
else {
|
||||
if ($PSCmdlet.ShouldProcess($DistroName, "wsl --terminate then wsl --unregister")) {
|
||||
& wsl.exe --terminate $DistroName 2>$null # ignore exit code -- distro may already be stopped
|
||||
& wsl.exe --unregister $DistroName
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Fail "wsl --unregister $DistroName failed (exit $LASTEXITCODE)"
|
||||
exit 1
|
||||
}
|
||||
Write-OK "Unregistered WSL distro $DistroName"
|
||||
$summary.distroState = "unregistered"
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would terminate + unregister WSL distro $DistroName"
|
||||
$summary.distroState = "(whatif) would-unregister"
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# STEP 4 -- Build x64 tray
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "STEP 4: Build x64 tray"
|
||||
|
||||
if ($SkipBuild) {
|
||||
Write-Skip "-SkipBuild set -- skipping dotnet build"
|
||||
$summary.buildResult = "skipped"
|
||||
}
|
||||
else {
|
||||
if (-not (Test-Path -LiteralPath $TrayProject)) {
|
||||
Write-Fail "Tray project not found: $TrayProject"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if ($PSCmdlet.ShouldProcess($TrayProject, "dotnet build -p:Platform=x64 --no-restore -v q")) {
|
||||
Write-Verbose "Running: dotnet build `"$TrayProject`" -p:Platform=x64 --no-restore -v q"
|
||||
& dotnet build $TrayProject -p:Platform=x64 --no-restore -v q
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
Write-Fail "dotnet build failed (exit $LASTEXITCODE)"
|
||||
$summary.buildResult = "failed"
|
||||
exit 1
|
||||
}
|
||||
Write-OK "Build succeeded"
|
||||
$summary.buildResult = "succeeded"
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would run: dotnet build `"$TrayProject`" -p:Platform=x64 --no-restore -v q"
|
||||
$summary.buildResult = "(whatif) would-build"
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# STEP 5 -- Launch tray
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "STEP 5: Launch tray"
|
||||
|
||||
if ($DontLaunch) {
|
||||
Write-Skip "-DontLaunch set -- not launching"
|
||||
}
|
||||
else {
|
||||
if ($PSCmdlet.ShouldProcess($TrayProject, "dotnet run -p:Platform=x64")) {
|
||||
if ($CaptureDir) {
|
||||
$captureAbs = if ([System.IO.Path]::IsPathRooted($CaptureDir)) {
|
||||
$CaptureDir
|
||||
}
|
||||
else {
|
||||
Join-Path $WorktreePath $CaptureDir
|
||||
}
|
||||
$env:OPENCLAW_VISUAL_TEST = "1"
|
||||
$env:OPENCLAW_VISUAL_TEST_DIR = $captureAbs
|
||||
Write-Verbose "Set OPENCLAW_VISUAL_TEST=1 OPENCLAW_VISUAL_TEST_DIR=$captureAbs"
|
||||
}
|
||||
|
||||
Write-Verbose "Launching: dotnet run --project `"$TrayProject`" -p:Platform=x64"
|
||||
$launchProc = Start-Process -FilePath "dotnet" `
|
||||
-ArgumentList "run", "--project", $TrayProject, "-p:Platform=x64" `
|
||||
-PassThru -WorkingDirectory $WorktreePath
|
||||
$summary.launchPid = $launchProc.Id
|
||||
Write-OK "Tray launched (PID $($launchProc.Id))"
|
||||
}
|
||||
else {
|
||||
Write-Skip "WhatIf: would launch: dotnet run --project `"$TrayProject`" -p:Platform=x64"
|
||||
if ($CaptureDir) {
|
||||
Write-Skip "WhatIf: would also set OPENCLAW_VISUAL_TEST=1 and OPENCLAW_VISUAL_TEST_DIR=$CaptureDir"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
# Summary
|
||||
# =============================================================================
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "---------------------------- Summary ----------------------------"
|
||||
Write-Host " Backup path : $(if ($summary.backupPath) { $summary.backupPath } elseif ($NoBackup) { '(deleted directly)' } else { '(nothing backed up)' })"
|
||||
Write-Host " Distro state : $($summary.distroState)"
|
||||
Write-Host " Build result : $($summary.buildResult)"
|
||||
Write-Host " Launch PID : $(if ($summary.launchPid) { $summary.launchPid } else { '(not launched)' })"
|
||||
Write-Host "-----------------------------------------------------------------"
|
||||
Write-Host ""
|
||||
@ -1,388 +0,0 @@
|
||||
# reset-openclaw-wsl-validation-state.ps1
|
||||
#
|
||||
# Exact-target destructive cleanup for OpenClaw-owned WSL validation state.
|
||||
#
|
||||
# Safety guarantees enforced by this script:
|
||||
# 1. Without -ConfirmDestructiveClean, the script runs in DRY-RUN mode and
|
||||
# reports what it WOULD do; it never mutates state.
|
||||
# 2. The only WSL distro this script will ever touch is the production
|
||||
# constant "OpenClawGateway". Any other distro name is rejected.
|
||||
# 3. Destructive operations are preceded by a copy of the user's
|
||||
# %APPDATA%\OpenClawTray and %LOCALAPPDATA%\OpenClawTray identity
|
||||
# directories to a timestamped backup location (printed to console).
|
||||
# 4. The script never calls `wsl --shutdown`. It uses
|
||||
# `wsl --terminate OpenClawGateway` only.
|
||||
# 5. The script never reads or writes \\wsl$ / \\wsl.localhost paths.
|
||||
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[string]$OutputDir = (Join-Path (Get-Location) "artifacts\wsl-gateway-validation\reset"),
|
||||
[string]$BackupRoot,
|
||||
[string]$AppDataRoot,
|
||||
[string]$LocalAppDataRoot,
|
||||
[string]$InstallLocation,
|
||||
[switch]$CleanInstallLocation,
|
||||
[switch]$ConfirmDestructiveClean,
|
||||
[switch]$KeepRunningProcesses,
|
||||
[switch]$PassThruJson
|
||||
)
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
# Production-locked WSL distro name (Phase 3 constant). This script will
|
||||
# refuse to act on any other distro, even via -DistroName overrides
|
||||
# (which are intentionally absent).
|
||||
$script:OpenClawDistroName = "OpenClawGateway"
|
||||
|
||||
$startedAt = Get-Date
|
||||
$timestamp = $startedAt.ToString("yyyyMMddHHmmss")
|
||||
|
||||
if ([string]::IsNullOrWhiteSpace($BackupRoot)) {
|
||||
$BackupRoot = Join-Path (Get-Location) "artifacts\reset-backups\$timestamp"
|
||||
}
|
||||
|
||||
$result = [ordered]@{
|
||||
script = "reset-openclaw-wsl-validation-state"
|
||||
startedAt = $startedAt.ToString("o")
|
||||
finishedAt = $null
|
||||
outputDir = $OutputDir
|
||||
backupRoot = $BackupRoot
|
||||
distroName = $script:OpenClawDistroName
|
||||
installLocation = $InstallLocation
|
||||
appDataRoot = $AppDataRoot
|
||||
localAppDataRoot = $LocalAppDataRoot
|
||||
destructiveConfirmed = [bool]$ConfirmDestructiveClean
|
||||
dryRun = -not $ConfirmDestructiveClean
|
||||
targets = [ordered]@{}
|
||||
steps = @()
|
||||
}
|
||||
|
||||
function Add-ResetStep {
|
||||
param(
|
||||
[string]$Name,
|
||||
[string]$Status,
|
||||
[string]$Message,
|
||||
[hashtable]$Data = @{}
|
||||
)
|
||||
|
||||
$script:result.steps += [ordered]@{
|
||||
name = $Name
|
||||
status = $Status
|
||||
message = $Message
|
||||
data = $Data
|
||||
timestamp = (Get-Date).ToString("o")
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-CapturedCommand {
|
||||
param(
|
||||
[string]$Name,
|
||||
[string]$FilePath,
|
||||
[string[]]$ArgumentList,
|
||||
[string]$WorkingDirectory = (Get-Location).Path,
|
||||
[switch]$IgnoreExitCode
|
||||
)
|
||||
|
||||
$stepDir = Join-Path $OutputDir "commands"
|
||||
New-Item -ItemType Directory -Force -Path $stepDir | Out-Null
|
||||
$safeName = $Name -replace "[^a-zA-Z0-9_.-]", "-"
|
||||
$stdout = Join-Path $stepDir "$safeName.stdout.txt"
|
||||
$stderr = Join-Path $stepDir "$safeName.stderr.txt"
|
||||
|
||||
Push-Location $WorkingDirectory
|
||||
try {
|
||||
& $FilePath @ArgumentList > $stdout 2> $stderr
|
||||
$exitCode = if ($null -eq $global:LASTEXITCODE) { 0 } else { $global:LASTEXITCODE }
|
||||
}
|
||||
finally {
|
||||
Pop-Location
|
||||
}
|
||||
|
||||
Add-ResetStep $Name "Completed" "Command completed with exit code $exitCode." @{
|
||||
file = $FilePath
|
||||
arguments = ($ArgumentList -join " ")
|
||||
exitCode = $exitCode
|
||||
stdout = $stdout
|
||||
stderr = $stderr
|
||||
}
|
||||
|
||||
if ($exitCode -ne 0 -and -not $IgnoreExitCode) {
|
||||
throw "$Name failed with exit code $exitCode. See $stdout and $stderr."
|
||||
}
|
||||
}
|
||||
|
||||
function Backup-Directory {
|
||||
param(
|
||||
[string]$Path,
|
||||
[string]$Label
|
||||
)
|
||||
|
||||
if (-not (Test-Path -LiteralPath $Path)) {
|
||||
Add-ResetStep "backup-$Label" "Skipped" "$Path does not exist."
|
||||
return
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $BackupRoot | Out-Null
|
||||
$leaf = Split-Path -Leaf $Path
|
||||
$destination = Join-Path $BackupRoot "$Label-$leaf"
|
||||
|
||||
if ($result.dryRun) {
|
||||
Add-ResetStep "backup-$Label" "DryRun" "Would copy $Path to $destination, then remove the original." @{
|
||||
source = $Path
|
||||
destination = $destination
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (Test-Path -LiteralPath $destination) {
|
||||
$destination = Join-Path $BackupRoot ("{0}-{1:yyyyMMddHHmmss}" -f "$Label-$leaf", (Get-Date))
|
||||
}
|
||||
|
||||
# Copy first so the user can recover even if removal fails partway.
|
||||
Copy-Item -LiteralPath $Path -Destination $destination -Recurse -Force
|
||||
Remove-Item -LiteralPath $Path -Recurse -Force
|
||||
Add-ResetStep "backup-$Label" "Completed" "Backed up $Path to $destination, then removed the original." @{
|
||||
source = $Path
|
||||
destination = $destination
|
||||
}
|
||||
}
|
||||
|
||||
function Assert-DestructiveTargetIsAllowed {
|
||||
# Hard-lock: this script will only ever touch the production OpenClawGateway distro.
|
||||
# No override flag exists. If $script:OpenClawDistroName is ever something else,
|
||||
# the script must refuse to run regardless of dry-run mode.
|
||||
if ($script:OpenClawDistroName -ne "OpenClawGateway") {
|
||||
throw "Refusing to run: distro name is locked to 'OpenClawGateway' but resolved to '$($script:OpenClawDistroName)'."
|
||||
}
|
||||
}
|
||||
|
||||
function Get-PortOwnerSnapshot {
|
||||
param([string]$Label)
|
||||
|
||||
$port = 18789
|
||||
try {
|
||||
$connections = @(Get-NetTCPConnection -LocalPort $port -ErrorAction Stop)
|
||||
$snapshot = @($connections | ForEach-Object {
|
||||
[ordered]@{
|
||||
localAddress = $_.LocalAddress
|
||||
localPort = $_.LocalPort
|
||||
state = $_.State.ToString()
|
||||
owningProcess = $_.OwningProcess
|
||||
}
|
||||
})
|
||||
}
|
||||
catch {
|
||||
$snapshot = @()
|
||||
}
|
||||
|
||||
$snapshotPath = Join-Path $OutputDir "port-18789-$Label.json"
|
||||
$snapshot | ConvertTo-Json -Depth 5 | Set-Content -LiteralPath $snapshotPath -Encoding UTF8
|
||||
Add-ResetStep "port-snapshot-$Label" "Completed" "Captured TCP listener snapshot for port 18789." @{
|
||||
path = $snapshotPath
|
||||
ownerCount = @($snapshot).Count
|
||||
}
|
||||
return $snapshot
|
||||
}
|
||||
|
||||
function Get-WslDistros {
|
||||
$output = & wsl.exe --list --quiet 2>$null
|
||||
if ($LASTEXITCODE -ne 0 -or $null -eq $output) {
|
||||
return @()
|
||||
}
|
||||
|
||||
return @($output | ForEach-Object { ($_ -replace "`0", "").Trim() } | Where-Object { $_ })
|
||||
}
|
||||
|
||||
function Get-OpenClawProcesses {
|
||||
return @(Get-Process | Where-Object { $_.ProcessName -like "OpenClaw*" })
|
||||
}
|
||||
|
||||
function Add-TargetSummary {
|
||||
param(
|
||||
[object[]]$Processes,
|
||||
[string[]]$Distros,
|
||||
[string]$AppDataPath,
|
||||
[string]$LocalAppDataPath,
|
||||
[string]$InstallLocationPath,
|
||||
[object[]]$PortOwners
|
||||
)
|
||||
|
||||
$script:result.targets = [ordered]@{
|
||||
processes = @($Processes | ForEach-Object {
|
||||
[ordered]@{
|
||||
pid = $_.Id
|
||||
name = $_.ProcessName
|
||||
path = $_.Path
|
||||
}
|
||||
})
|
||||
distroExists = ($Distros -contains $script:OpenClawDistroName)
|
||||
distroName = $script:OpenClawDistroName
|
||||
appDataPath = $AppDataPath
|
||||
appDataExists = Test-Path -LiteralPath $AppDataPath
|
||||
localAppDataPath = $LocalAppDataPath
|
||||
localAppDataExists = Test-Path -LiteralPath $LocalAppDataPath
|
||||
installLocationPath = $InstallLocationPath
|
||||
installLocationExists = (-not [string]::IsNullOrWhiteSpace($InstallLocationPath)) -and (Test-Path -LiteralPath $InstallLocationPath)
|
||||
installLocationCleanupRequested = [bool]$CleanInstallLocation
|
||||
port18789OwnersBefore = @($PortOwners)
|
||||
outputDir = $OutputDir
|
||||
backupRoot = $BackupRoot
|
||||
}
|
||||
|
||||
Add-ResetStep "target-summary" "Completed" "Captured OpenClaw-owned reset targets." @{
|
||||
processCount = @($Processes).Count
|
||||
distroExists = [bool]$script:result.targets.distroExists
|
||||
appDataExists = [bool]$script:result.targets.appDataExists
|
||||
localAppDataExists = [bool]$script:result.targets.localAppDataExists
|
||||
installLocationExists = [bool]$script:result.targets.installLocationExists
|
||||
}
|
||||
}
|
||||
|
||||
function Assert-CleanPostCondition {
|
||||
param(
|
||||
[string]$AppDataPath,
|
||||
[string]$LocalAppDataPath,
|
||||
[string]$InstallLocationPath
|
||||
)
|
||||
|
||||
if ($result.dryRun) {
|
||||
Add-ResetStep "postconditions" "Skipped" "Postconditions are skipped during dry-run."
|
||||
return
|
||||
}
|
||||
|
||||
$remainingProcesses = @(Get-OpenClawProcesses)
|
||||
if (-not $KeepRunningProcesses -and $remainingProcesses.Count -gt 0) {
|
||||
throw "OpenClaw processes are still running after reset: $(@($remainingProcesses | ForEach-Object { $_.Id }) -join ', ')"
|
||||
}
|
||||
|
||||
$remainingDistros = @(Get-WslDistros)
|
||||
if ($remainingDistros -contains $script:OpenClawDistroName) {
|
||||
throw "WSL distro '$($script:OpenClawDistroName)' is still registered after reset."
|
||||
}
|
||||
|
||||
if (Test-Path -LiteralPath $AppDataPath) {
|
||||
throw "AppData path still exists after reset: $AppDataPath"
|
||||
}
|
||||
|
||||
if (Test-Path -LiteralPath $LocalAppDataPath) {
|
||||
throw "LocalAppData path still exists after reset: $LocalAppDataPath"
|
||||
}
|
||||
|
||||
if ($CleanInstallLocation -and -not [string]::IsNullOrWhiteSpace($InstallLocationPath) -and (Test-Path -LiteralPath $InstallLocationPath)) {
|
||||
throw "Install location still exists after reset: $InstallLocationPath"
|
||||
}
|
||||
|
||||
$wslListAfterPath = Join-Path $OutputDir "wsl-list-after.txt"
|
||||
& wsl.exe --list --verbose > $wslListAfterPath 2>&1
|
||||
$script:result.targets.port18789OwnersAfter = @(Get-PortOwnerSnapshot -Label "after")
|
||||
Add-ResetStep "postconditions" "Passed" "OpenClaw-owned state reset postconditions passed." @{
|
||||
wslListAfter = $wslListAfterPath
|
||||
}
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $OutputDir | Out-Null
|
||||
|
||||
try {
|
||||
Assert-DestructiveTargetIsAllowed
|
||||
|
||||
if ([string]::IsNullOrWhiteSpace($AppDataRoot)) {
|
||||
$AppDataRoot = $env:APPDATA
|
||||
$result.appDataRoot = $AppDataRoot
|
||||
}
|
||||
if ([string]::IsNullOrWhiteSpace($LocalAppDataRoot)) {
|
||||
$LocalAppDataRoot = $env:LOCALAPPDATA
|
||||
$result.localAppDataRoot = $LocalAppDataRoot
|
||||
}
|
||||
|
||||
$appData = Join-Path $AppDataRoot "OpenClawTray"
|
||||
$localAppData = Join-Path $LocalAppDataRoot "OpenClawTray"
|
||||
$processes = @(Get-OpenClawProcesses)
|
||||
$distros = @(Get-WslDistros)
|
||||
$portOwnersBefore = @(Get-PortOwnerSnapshot -Label "before")
|
||||
Add-TargetSummary -Processes $processes -Distros $distros -AppDataPath $appData -LocalAppDataPath $localAppData -InstallLocationPath $InstallLocation -PortOwners $portOwnersBefore
|
||||
|
||||
if ($result.dryRun) {
|
||||
Add-ResetStep "mode" "DryRun" "No state will be changed. Pass -ConfirmDestructiveClean to reset OpenClaw-owned state."
|
||||
Write-Host "DRY-RUN: pass -ConfirmDestructiveClean to actually reset OpenClaw-owned state."
|
||||
}
|
||||
else {
|
||||
Add-ResetStep "mode" "Confirmed" "OpenClaw-owned state reset is enabled for this run."
|
||||
Write-Host "Backups will be written under: $BackupRoot"
|
||||
}
|
||||
|
||||
if ($processes.Count -eq 0) {
|
||||
Add-ResetStep "stop-openclaw-processes" "Skipped" "No OpenClaw processes are running."
|
||||
}
|
||||
elseif ($KeepRunningProcesses) {
|
||||
Add-ResetStep "stop-openclaw-processes" "Skipped" "Keeping running OpenClaw processes because -KeepRunningProcesses was set." @{
|
||||
pids = @($processes | ForEach-Object { $_.Id })
|
||||
}
|
||||
}
|
||||
elseif ($result.dryRun) {
|
||||
Add-ResetStep "stop-openclaw-processes" "DryRun" "Would stop running OpenClaw processes by PID." @{
|
||||
pids = @($processes | ForEach-Object { $_.Id })
|
||||
}
|
||||
}
|
||||
else {
|
||||
foreach ($process in $processes) {
|
||||
Stop-Process -Id $process.Id -Force
|
||||
}
|
||||
Add-ResetStep "stop-openclaw-processes" "Completed" "Stopped running OpenClaw processes by PID." @{
|
||||
pids = @($processes | ForEach-Object { $_.Id })
|
||||
}
|
||||
}
|
||||
|
||||
$hasGatewayDistro = $distros -contains $script:OpenClawDistroName
|
||||
$wslListPath = Join-Path $OutputDir "wsl-list-before.txt"
|
||||
& wsl.exe --list --verbose > $wslListPath 2>&1
|
||||
Add-ResetStep "capture-wsl-list" "Completed" "Captured WSL distro list." @{ path = $wslListPath }
|
||||
|
||||
if (-not $hasGatewayDistro) {
|
||||
Add-ResetStep "unregister-$($script:OpenClawDistroName)" "Skipped" "WSL distro '$($script:OpenClawDistroName)' is not registered."
|
||||
}
|
||||
elseif ($result.dryRun) {
|
||||
Add-ResetStep "unregister-$($script:OpenClawDistroName)" "DryRun" "Would terminate and unregister only the '$($script:OpenClawDistroName)' WSL distro." @{ distroName = $script:OpenClawDistroName }
|
||||
}
|
||||
else {
|
||||
# Exact-target only: --terminate <name>, never --shutdown.
|
||||
Invoke-CapturedCommand "wsl-terminate-$($script:OpenClawDistroName)" "wsl.exe" @("--terminate", $script:OpenClawDistroName) -IgnoreExitCode
|
||||
Invoke-CapturedCommand "wsl-unregister-$($script:OpenClawDistroName)" "wsl.exe" @("--unregister", $script:OpenClawDistroName)
|
||||
}
|
||||
|
||||
Backup-Directory -Path $appData -Label "appdata"
|
||||
Backup-Directory -Path $localAppData -Label "localappdata"
|
||||
if ($CleanInstallLocation) {
|
||||
if ([string]::IsNullOrWhiteSpace($InstallLocation)) {
|
||||
Add-ResetStep "backup-install-location" "Skipped" "No install location was supplied."
|
||||
}
|
||||
else {
|
||||
Backup-Directory -Path $InstallLocation -Label "install-location"
|
||||
}
|
||||
}
|
||||
else {
|
||||
Add-ResetStep "backup-install-location" "Skipped" "Install location cleanup was not requested."
|
||||
}
|
||||
Assert-CleanPostCondition -AppDataPath $appData -LocalAppDataPath $localAppData -InstallLocationPath $InstallLocation
|
||||
|
||||
$result.finishedAt = (Get-Date).ToString("o")
|
||||
$summaryPath = Join-Path $OutputDir "reset-summary.json"
|
||||
$result | ConvertTo-Json -Depth 10 | Set-Content -LiteralPath $summaryPath -Encoding UTF8
|
||||
if ($PassThruJson) {
|
||||
$result | ConvertTo-Json -Depth 10
|
||||
}
|
||||
else {
|
||||
Write-Host "Reset summary: $summaryPath"
|
||||
if (-not $result.dryRun) {
|
||||
Write-Host "Backup root: $BackupRoot"
|
||||
}
|
||||
}
|
||||
}
|
||||
catch {
|
||||
$result.finishedAt = (Get-Date).ToString("o")
|
||||
Add-ResetStep "reset" "Failed" $_.Exception.Message
|
||||
$summaryPath = Join-Path $OutputDir "reset-summary.json"
|
||||
$result | ConvertTo-Json -Depth 10 | Set-Content -LiteralPath $summaryPath -Encoding UTF8
|
||||
Write-Error $_.Exception.Message
|
||||
exit 1
|
||||
}
|
||||
@ -1,941 +0,0 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Validate the OpenClaw WSL gateway local-setup product code path end-to-end.
|
||||
|
||||
.DESCRIPTION
|
||||
Phase 6 clean port. Drives the WinUI3 tray app from launch through the
|
||||
forked onboarding (SetupWarningPage -> "Set up locally" -> LocalSetupProgressPage)
|
||||
so the *product* code path that runs
|
||||
|
||||
wsl --install Ubuntu-24.04 --name OpenClawGateway --location <path> --no-launch --version 2
|
||||
|
||||
is exercised end-to-end. The script does NOT install WSL itself and does NOT
|
||||
invoke `wsl --install` directly: it expects the tray engine to do that and
|
||||
only verifies the postcondition.
|
||||
|
||||
Networking diagnostics are loopback-only. There is no WSL-IP / lan / auto
|
||||
fallback. Token / setup-code / private-key material is redacted in artifacts.
|
||||
|
||||
.PARAMETER Scenario
|
||||
PreflightOnly - Repo layout + WSL host status + relay probe (safe; no install).
|
||||
UpstreamInstall - Build/test, drive tray onboarding to install OpenClawGateway,
|
||||
run smoke + pairing proofs. Reuses an existing distro if present.
|
||||
FreshMachine - Like UpstreamInstall, but unregisters any existing
|
||||
OpenClawGateway distro first (simulates a clean machine).
|
||||
Recreate - Iterated FreshMachine (unregister between runs). Use `-Iterations`.
|
||||
|
||||
.NOTES
|
||||
Diagnostics on networking/lifecycle health failures point operators at
|
||||
https://aka.ms/wsllogs (per Craig).
|
||||
|
||||
File I/O against WSL is via `wsl bash -c` only. NEVER \\wsl$ / \\wsl.localhost.
|
||||
#>
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[ValidateSet("PreflightOnly", "UpstreamInstall", "FreshMachine", "Recreate")]
|
||||
[string]$Scenario = "PreflightOnly",
|
||||
[string]$OutputDir = (Join-Path (Get-Location) "artifacts\wsl-gateway-validation"),
|
||||
[int]$Iterations = 1,
|
||||
[switch]$ConfirmDestructiveClean,
|
||||
[switch]$KeepFailedDistro,
|
||||
[bool]$CleanupAfterSuccess = $true,
|
||||
[switch]$ContinueOnCleanupFailure,
|
||||
[switch]$NoBuild,
|
||||
[int]$TimeoutSeconds = 600,
|
||||
[string]$DistroName = "OpenClawGateway",
|
||||
[string]$GatewayUrl = "ws://127.0.0.1:18789",
|
||||
[string]$RelayProbeUri,
|
||||
[switch]$RequireRelayProbe,
|
||||
[switch]$RequireRealGatewayBootstrap,
|
||||
[switch]$RequireOperatorPairing,
|
||||
[switch]$RequireWindowsNodePairing,
|
||||
[switch]$ContinueOnFailure
|
||||
)
|
||||
|
||||
Set-StrictMode -Version Latest
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
$repoRoot = Resolve-Path (Join-Path $PSScriptRoot "..")
|
||||
$runStamp = Get-Date -Format "yyyyMMdd-HHmmss"
|
||||
$runRoot = Join-Path $OutputDir $runStamp
|
||||
$commandsRoot = Join-Path $runRoot "commands"
|
||||
$screenshotsRoot = Join-Path $runRoot "screenshots"
|
||||
$summaryPath = Join-Path $runRoot "summary.json"
|
||||
$summaryMarkdownPath = Join-Path $runRoot "summary.md"
|
||||
$trayProject = Join-Path $repoRoot "src\OpenClaw.Tray.WinUI\OpenClaw.Tray.WinUI.csproj"
|
||||
$runtimeIdentifier = if ($env:PROCESSOR_ARCHITECTURE -eq "ARM64") { "win-arm64" } else { "win-x64" }
|
||||
$trayExe = Join-Path $repoRoot "src\OpenClaw.Tray.WinUI\bin\Debug\net10.0-windows10.0.19041.0\$runtimeIdentifier\OpenClaw.Tray.WinUI.exe"
|
||||
$cliProject = Join-Path $repoRoot "src\OpenClaw.Cli\OpenClaw.Cli.csproj"
|
||||
|
||||
# Always isolate AppData under run root for non-Preflight scenarios so we never
|
||||
# trample the operator's real Windows tray identity.
|
||||
$validationAppDataRoot = if ($Scenario -eq "PreflightOnly") { $env:APPDATA } else { Join-Path $runRoot "isolated\appdata" }
|
||||
$validationLocalAppDataRoot = if ($Scenario -eq "PreflightOnly") { $env:LOCALAPPDATA } else { Join-Path $runRoot "isolated\localappdata" }
|
||||
$setupStatePath = Join-Path $validationLocalAppDataRoot "OpenClawTray\setup-state.json"
|
||||
$settingsPath = Join-Path $validationAppDataRoot "settings.json"
|
||||
$wslInstallLocation = Join-Path $runRoot "wsl\$DistroName"
|
||||
|
||||
$script:summary = [ordered]@{
|
||||
script = "validate-wsl-gateway"
|
||||
scenario = $Scenario
|
||||
startedAt = (Get-Date).ToString("o")
|
||||
finishedAt = $null
|
||||
status = "Running"
|
||||
validationStatus = "Running"
|
||||
cleanupStatus = "NotStarted"
|
||||
repository = $repoRoot.Path
|
||||
outputDir = $runRoot
|
||||
networkingMode = "LocalhostOnly"
|
||||
activeDistroName = $DistroName
|
||||
activeInstallLocation = $wslInstallLocation
|
||||
selectedGatewayUrl = $GatewayUrl
|
||||
pairingValidation = [ordered]@{
|
||||
gatewayImplementation = "Unknown"
|
||||
bootstrapQrShape = "Unknown"
|
||||
realUpstreamBootstrapHandoff = $false
|
||||
operatorPaired = $false
|
||||
windowsNodePaired = $false
|
||||
}
|
||||
setupPhases = @()
|
||||
iterations = @()
|
||||
steps = @()
|
||||
error = $null
|
||||
}
|
||||
|
||||
function Add-Step {
|
||||
param([string]$Name, [string]$Status, [string]$Message, [hashtable]$Data = @{})
|
||||
$script:summary.steps += [ordered]@{
|
||||
name = $Name
|
||||
status = $Status
|
||||
message = $Message
|
||||
data = $Data
|
||||
timestamp = (Get-Date).ToString("o")
|
||||
}
|
||||
}
|
||||
|
||||
function Test-IsOpenClawOwnedDistroName {
|
||||
param([string]$Name)
|
||||
return $Name -eq "OpenClawGateway" -or $Name.StartsWith("OpenClawGateway", [System.StringComparison]::Ordinal)
|
||||
}
|
||||
|
||||
function Assert-DestructiveSafety {
|
||||
if ($Scenario -in @("FreshMachine", "Recreate") -and -not $ConfirmDestructiveClean) {
|
||||
throw "-ConfirmDestructiveClean is required when -Scenario is $Scenario (will unregister WSL distro '$DistroName')."
|
||||
}
|
||||
if ($Scenario -in @("FreshMachine", "Recreate") -and -not (Test-IsOpenClawOwnedDistroName -Name $DistroName)) {
|
||||
throw "Refusing destructive action for non-OpenClaw distro '$DistroName'. Distro name must start with 'OpenClawGateway'."
|
||||
}
|
||||
}
|
||||
|
||||
function Get-SafeUriDisplay {
|
||||
param([string]$Uri)
|
||||
try {
|
||||
$b = [System.UriBuilder]::new($Uri)
|
||||
$b.Query = $null; $b.Fragment = $null
|
||||
return $b.Uri.AbsoluteUri
|
||||
} catch {
|
||||
return "<invalid-uri>"
|
||||
}
|
||||
}
|
||||
|
||||
function Write-Summary {
|
||||
New-Item -ItemType Directory -Force -Path $runRoot | Out-Null
|
||||
$script:summary.finishedAt = (Get-Date).ToString("o")
|
||||
$script:summary | ConvertTo-Json -Depth 20 | Set-Content -LiteralPath $summaryPath -Encoding UTF8
|
||||
|
||||
$lines = @(
|
||||
"# OpenClaw WSL gateway validation",
|
||||
"",
|
||||
"- Scenario: $Scenario",
|
||||
"- Status: $($script:summary.status)",
|
||||
"- Validation: $($script:summary.validationStatus)",
|
||||
"- Cleanup: $($script:summary.cleanupStatus)",
|
||||
"- Networking mode: LocalhostOnly (loopback only)",
|
||||
"- Started: $($script:summary.startedAt)",
|
||||
"- Finished: $($script:summary.finishedAt)",
|
||||
"- Output: $runRoot",
|
||||
"",
|
||||
"## Steps"
|
||||
)
|
||||
foreach ($step in $script:summary.steps) {
|
||||
$lines += "- $($step.status): $($step.name) - $($step.message)"
|
||||
}
|
||||
if ($script:summary.error) {
|
||||
$lines += "", "## Error", $script:summary.error
|
||||
$lines += "", "Diagnostics: see https://aka.ms/wsllogs for WSL networking/lifecycle logs."
|
||||
}
|
||||
$lines | Set-Content -LiteralPath $summaryMarkdownPath -Encoding UTF8
|
||||
}
|
||||
|
||||
function Redact-SensitiveGatewayOutput {
|
||||
param([string]$Content)
|
||||
if ([string]::IsNullOrEmpty($Content)) { return $Content }
|
||||
$r = $Content -replace '("(?:bootstrapToken|bootstrap_token|deviceToken|device_token|token|setupCode|setup_code|PrivateKeyBase64|PublicKeyBase64)"\s*:\s*")[^"]+(")', '$1<redacted>$2'
|
||||
$r = $r -replace '(?i)((?:bootstrap|device|gateway|auth)[_-]?token\s*[:=]\s*)[^\s,"''}]+', '$1<redacted>'
|
||||
return $r
|
||||
}
|
||||
|
||||
function Read-TextFileWithRetry {
|
||||
param([string]$Path, [int]$Attempts = 10, [int]$DelayMilliseconds = 200)
|
||||
for ($i = 1; $i -le $Attempts; $i++) {
|
||||
try { return Get-Content -LiteralPath $Path -Raw -ErrorAction Stop }
|
||||
catch [System.IO.IOException] { if ($i -eq $Attempts) { throw } ; Start-Sleep -Milliseconds $DelayMilliseconds }
|
||||
}
|
||||
}
|
||||
|
||||
function Write-TextFileWithRetry {
|
||||
param([string]$Path, [string]$Content, [int]$Attempts = 10, [int]$DelayMilliseconds = 200)
|
||||
for ($i = 1; $i -le $Attempts; $i++) {
|
||||
try { $Content | Set-Content -LiteralPath $Path -Encoding UTF8 -ErrorAction Stop ; return }
|
||||
catch [System.IO.IOException] { if ($i -eq $Attempts) { throw } ; Start-Sleep -Milliseconds $DelayMilliseconds }
|
||||
}
|
||||
}
|
||||
|
||||
function Copy-RedactedFileIfExists {
|
||||
param([string]$SourcePath, [string]$DestinationPath)
|
||||
if (-not (Test-Path -LiteralPath $SourcePath)) { return $false }
|
||||
$content = Read-TextFileWithRetry -Path $SourcePath
|
||||
Write-TextFileWithRetry -Path $DestinationPath -Content (Redact-SensitiveGatewayOutput $content)
|
||||
return $true
|
||||
}
|
||||
|
||||
function Invoke-LoggedProcess {
|
||||
param(
|
||||
[string]$Name,
|
||||
[string]$FilePath,
|
||||
[string[]]$ArgumentList,
|
||||
[string]$WorkingDirectory = $repoRoot.Path,
|
||||
[hashtable]$Environment = @{},
|
||||
[switch]$IgnoreExitCode,
|
||||
[switch]$SensitiveOutput
|
||||
)
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $commandsRoot | Out-Null
|
||||
$safe = $Name -replace "[^a-zA-Z0-9_.-]", "-"
|
||||
$stdout = Join-Path $commandsRoot "$safe.stdout.txt"
|
||||
$stderr = Join-Path $commandsRoot "$safe.stderr.txt"
|
||||
$saved = @{}
|
||||
foreach ($k in $Environment.Keys) {
|
||||
$saved[$k] = [Environment]::GetEnvironmentVariable($k, "Process")
|
||||
[Environment]::SetEnvironmentVariable($k, [string]$Environment[$k], "Process")
|
||||
}
|
||||
Push-Location $WorkingDirectory
|
||||
try {
|
||||
& $FilePath @ArgumentList > $stdout 2> $stderr
|
||||
$exitCode = if ($null -eq $global:LASTEXITCODE) { 0 } else { $global:LASTEXITCODE }
|
||||
} finally {
|
||||
Pop-Location
|
||||
foreach ($k in $Environment.Keys) {
|
||||
[Environment]::SetEnvironmentVariable($k, $saved[$k], "Process")
|
||||
}
|
||||
}
|
||||
|
||||
if ($SensitiveOutput) {
|
||||
foreach ($p in @($stdout, $stderr)) {
|
||||
if (Test-Path -LiteralPath $p) {
|
||||
$c = Read-TextFileWithRetry -Path $p -Attempts 20 -DelayMilliseconds 250
|
||||
Write-TextFileWithRetry -Path $p -Content (Redact-SensitiveGatewayOutput $c) -Attempts 20 -DelayMilliseconds 250
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Add-Step $Name "Completed" "Command completed with exit code $exitCode." @{
|
||||
file = $FilePath; arguments = ($ArgumentList -join " "); exitCode = $exitCode; stdout = $stdout; stderr = $stderr
|
||||
}
|
||||
|
||||
if ($exitCode -ne 0 -and -not $IgnoreExitCode) {
|
||||
throw "$Name failed with exit code $exitCode. See $stdout and $stderr."
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-LoggedPowerShellScript {
|
||||
param([string]$Name, [string]$ScriptPath, [string[]]$ArgumentList = @())
|
||||
$hostExe = if ($PSHOME -and (Test-Path (Join-Path $PSHOME "pwsh.exe"))) { Join-Path $PSHOME "pwsh.exe" } else { "powershell.exe" }
|
||||
$args = @("-NoProfile", "-ExecutionPolicy", "Bypass", "-File", $ScriptPath) + $ArgumentList
|
||||
Invoke-LoggedProcess -Name $Name -FilePath $hostExe -ArgumentList $args
|
||||
}
|
||||
|
||||
function Invoke-RepositoryValidation {
|
||||
if ($NoBuild) {
|
||||
Add-Step "repository-validation" "Skipped" "Skipped build and tests because -NoBuild was set."
|
||||
return
|
||||
}
|
||||
Invoke-LoggedPowerShellScript "build" (Join-Path $repoRoot "build.ps1")
|
||||
Invoke-LoggedProcess "test-shared" "dotnet" @("test", ".\tests\OpenClaw.Shared.Tests\OpenClaw.Shared.Tests.csproj", "--no-restore")
|
||||
Invoke-LoggedProcess "test-tray" "dotnet" @("test", ".\tests\OpenClaw.Tray.Tests\OpenClaw.Tray.Tests.csproj", "--no-restore")
|
||||
}
|
||||
|
||||
function Invoke-Preflight {
|
||||
Invoke-LoggedProcess "dotnet-info" "dotnet" @("--info") -IgnoreExitCode
|
||||
Invoke-LoggedProcess "wsl-status" "wsl.exe" @("--status") -IgnoreExitCode
|
||||
Invoke-LoggedProcess "wsl-list-before" "wsl.exe" @("--list", "--verbose") -IgnoreExitCode
|
||||
|
||||
if (-not (Test-Path -LiteralPath $trayProject)) { throw "Tray project not found: $trayProject" }
|
||||
if (-not (Test-Path -LiteralPath $cliProject)) { throw "CLI project not found: $cliProject" }
|
||||
Add-Step "repo-layout" "Passed" "Required projects are present."
|
||||
|
||||
Invoke-RelayPrototypeProbe
|
||||
}
|
||||
|
||||
function Invoke-RelayPrototypeProbe {
|
||||
$probeUri = if (-not [string]::IsNullOrWhiteSpace($RelayProbeUri)) { $RelayProbeUri } else { [Environment]::GetEnvironmentVariable("OPENCLAW_RELAY_PROBE_URI", "Process") }
|
||||
if ([string]::IsNullOrWhiteSpace($probeUri)) {
|
||||
$msg = "No relay probe endpoint was supplied. Set -RelayProbeUri or OPENCLAW_RELAY_PROBE_URI."
|
||||
if ($RequireRelayProbe) { throw "RelayProbeMissing: $msg" }
|
||||
Add-Step "relay-prototype-probe" "NotAvailable" $msg
|
||||
return
|
||||
}
|
||||
$relayPath = Join-Path $commandsRoot "relay-prototype-probe.txt"
|
||||
New-Item -ItemType Directory -Force -Path $commandsRoot | Out-Null
|
||||
try {
|
||||
$r = Invoke-WebRequest -Uri $probeUri -TimeoutSec 15 -UseBasicParsing
|
||||
$body = if ($null -ne $r.Content) { $r.Content } else { "" }
|
||||
$body = $body -replace '(?i)(token=)[^&\s]+', '$1<redacted>'
|
||||
$body | Set-Content -LiteralPath $relayPath -Encoding UTF8
|
||||
Add-Step "relay-prototype-probe" "Passed" "Relay probe endpoint responded." @{
|
||||
uri = (Get-SafeUriDisplay $probeUri); statusCode = [int]$r.StatusCode; path = $relayPath
|
||||
}
|
||||
} catch {
|
||||
throw "RelayProbeFailed: relay probe failed for $(Get-SafeUriDisplay $probeUri): $($_.Exception.Message)"
|
||||
}
|
||||
}
|
||||
|
||||
function Get-LatestScreenshotPath {
|
||||
if (-not (Test-Path -LiteralPath $screenshotsRoot)) { return $null }
|
||||
$latest = Get-ChildItem -LiteralPath $screenshotsRoot -Filter "*.png" -File -Recurse |
|
||||
Sort-Object LastWriteTime -Descending | Select-Object -First 1
|
||||
if ($null -eq $latest) { return $null }
|
||||
return $latest.FullName
|
||||
}
|
||||
|
||||
function Save-DiagnosticsSnapshot {
|
||||
param([string]$Reason)
|
||||
$diag = Join-Path $runRoot "diagnostics"
|
||||
New-Item -ItemType Directory -Force -Path $diag | Out-Null
|
||||
|
||||
if (Test-Path -LiteralPath $setupStatePath) {
|
||||
Copy-RedactedFileIfExists -SourcePath $setupStatePath -DestinationPath (Join-Path $diag "setup-state.redacted.json") | Out-Null
|
||||
}
|
||||
if (Test-Path -LiteralPath $settingsPath) {
|
||||
Copy-RedactedFileIfExists -SourcePath $settingsPath -DestinationPath (Join-Path $diag "settings.redacted.json") | Out-Null
|
||||
}
|
||||
$identityPath = Join-Path $validationAppDataRoot "OpenClawTray\device-key-ed25519.json"
|
||||
if (Test-Path -LiteralPath $identityPath) {
|
||||
Copy-RedactedFileIfExists -SourcePath $identityPath -DestinationPath (Join-Path $diag "device-key.shape.redacted.json") | Out-Null
|
||||
}
|
||||
|
||||
Add-Step "diagnostics-snapshot" "Completed" "Saved diagnostics snapshot for $Reason. See https://aka.ms/wsllogs for WSL networking/lifecycle logs." @{
|
||||
path = $diag
|
||||
latestScreenshot = (Get-LatestScreenshotPath)
|
||||
wslLogsHelp = "https://aka.ms/wsllogs"
|
||||
}
|
||||
}
|
||||
|
||||
function Get-ValidationAppEnvironment {
|
||||
return @{
|
||||
OPENCLAW_TRAY_DATA_DIR = $validationAppDataRoot
|
||||
OPENCLAW_TRAY_APPDATA_DIR = $validationAppDataRoot
|
||||
OPENCLAW_TRAY_LOCALAPPDATA_DIR = $validationLocalAppDataRoot
|
||||
}
|
||||
}
|
||||
|
||||
function Convert-SetupStatus {
|
||||
param([object]$Status)
|
||||
$v = [string]$Status
|
||||
if ($v -match '^\d+$') {
|
||||
# Aligned with LocalGatewaySetupStatus enum
|
||||
$names = @("Pending", "Running", "RequiresAdmin", "RequiresRestart", "Blocked",
|
||||
"FailedRetryable", "FailedTerminal", "Complete", "Cancelled")
|
||||
$i = [int]$v
|
||||
if ($i -ge 0 -and $i -lt $names.Count) { return $names[$i] }
|
||||
}
|
||||
return $v
|
||||
}
|
||||
|
||||
function Convert-SetupPhase {
|
||||
param([object]$Phase)
|
||||
$v = [string]$Phase
|
||||
if ($v -match '^\d+$') {
|
||||
# Aligned with the clean LocalGatewaySetupPhase enum (worker / rootfs phases removed).
|
||||
$names = @(
|
||||
"NotStarted", "Preflight", "ElevationCheck",
|
||||
"EnsureWslEnabled", "CreateWslInstance", "ConfigureWslInstance",
|
||||
"InstallOpenClawCli", "PrepareGatewayConfig", "InstallGatewayService",
|
||||
"StartGateway", "WaitForGateway",
|
||||
"MintBootstrapToken", "PairOperator",
|
||||
"CheckWindowsNodeReadiness", "PairWindowsTrayNode",
|
||||
"VerifyEndToEnd", "Complete", "Failed", "Cancelled"
|
||||
)
|
||||
$i = [int]$v
|
||||
if ($i -ge 0 -and $i -lt $names.Count) { return $names[$i] }
|
||||
}
|
||||
return $v
|
||||
}
|
||||
|
||||
function Wait-ForUiAutomationElement {
|
||||
param([string]$AutomationId, [int]$TimeoutSeconds)
|
||||
Add-Type -AssemblyName UIAutomationClient
|
||||
Add-Type -AssemblyName UIAutomationTypes
|
||||
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
|
||||
$cond = New-Object System.Windows.Automation.PropertyCondition(
|
||||
[System.Windows.Automation.AutomationElement]::AutomationIdProperty, $AutomationId)
|
||||
while ((Get-Date) -lt $deadline) {
|
||||
$el = [System.Windows.Automation.AutomationElement]::RootElement.FindFirst(
|
||||
[System.Windows.Automation.TreeScope]::Descendants, $cond)
|
||||
if ($null -ne $el) { return $el }
|
||||
Start-Sleep -Milliseconds 500
|
||||
}
|
||||
return $null
|
||||
}
|
||||
|
||||
function Invoke-UiAutomationClick {
|
||||
param([string]$AutomationId, [int]$TimeoutSeconds)
|
||||
$el = Wait-ForUiAutomationElement -AutomationId $AutomationId -TimeoutSeconds $TimeoutSeconds
|
||||
if ($null -ne $el) {
|
||||
$p = $el.GetCurrentPattern([System.Windows.Automation.InvokePattern]::Pattern)
|
||||
$p.Invoke()
|
||||
Add-Step "ui-click-$AutomationId" "Completed" "Clicked UI element with AutomationId '$AutomationId'."
|
||||
return
|
||||
}
|
||||
Save-DiagnosticsSnapshot -Reason "missing-ui-target-$AutomationId"
|
||||
throw "UI element with AutomationId '$AutomationId' was not found within $TimeoutSeconds seconds."
|
||||
}
|
||||
|
||||
function Stop-ExistingTrayProcesses {
|
||||
param([string]$Reason)
|
||||
$repoPrefix = [string]$repoRoot.Path
|
||||
$procs = Get-Process -Name "OpenClaw.Tray.WinUI" -ErrorAction SilentlyContinue |
|
||||
Where-Object {
|
||||
try { -not [string]::IsNullOrWhiteSpace($_.Path) -and $_.Path.StartsWith($repoPrefix, [System.StringComparison]::OrdinalIgnoreCase) }
|
||||
catch { $false }
|
||||
}
|
||||
foreach ($p in $procs) {
|
||||
$procId = $p.Id
|
||||
try {
|
||||
Stop-Process -Id $procId -Force -ErrorAction Stop
|
||||
Add-Step "stop-existing-tray" "Completed" "Stopped existing repo tray process by PID before validation." @{ pid = $procId; reason = $Reason }
|
||||
} catch [Microsoft.PowerShell.Commands.ProcessCommandException] {
|
||||
Add-Step "stop-existing-tray" "Skipped" "Repo tray process had already exited before cleanup." @{ pid = $procId; reason = $Reason }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Stop-WslKeepAliveProcesses {
|
||||
$target = $DistroName
|
||||
$procs = Get-CimInstance Win32_Process -Filter "Name = 'wsl.exe'" -ErrorAction SilentlyContinue |
|
||||
Where-Object {
|
||||
$_.CommandLine -and
|
||||
$_.CommandLine.Contains($target, [System.StringComparison]::OrdinalIgnoreCase) -and
|
||||
$_.CommandLine.Contains("sleep", [System.StringComparison]::OrdinalIgnoreCase) -and
|
||||
$_.CommandLine.Contains("2147483647", [System.StringComparison]::OrdinalIgnoreCase)
|
||||
}
|
||||
foreach ($p in $procs) {
|
||||
try {
|
||||
Stop-Process -Id $p.ProcessId -Force -ErrorAction Stop
|
||||
Add-Step "stop-wsl-keepalive" "Completed" "Stopped $target keepalive process by PID." @{ pid = $p.ProcessId; distroName = $target }
|
||||
} catch [Microsoft.PowerShell.Commands.ProcessCommandException] {
|
||||
Add-Step "stop-wsl-keepalive" "Skipped" "$target keepalive process had already exited." @{ pid = $p.ProcessId; distroName = $target }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Start-TrayForLocalSetup {
|
||||
Stop-ExistingTrayProcesses -Reason "pre-launch"
|
||||
|
||||
# Forked onboarding entry point is SetupWarning by default; we just force
|
||||
# onboarding mode and let the script click "Set up locally".
|
||||
$env = @{
|
||||
OPENCLAW_SKIP_UPDATE_CHECK = "1"
|
||||
OPENCLAW_FORCE_ONBOARDING = "1"
|
||||
OPENCLAW_WSL_DISTRO_NAME = $DistroName
|
||||
OPENCLAW_WSL_INSTALL_LOCATION = $wslInstallLocation
|
||||
OPENCLAW_WSL_ALLOW_EXISTING_DISTRO = if ($Scenario -eq "UpstreamInstall") { "1" } else { "0" }
|
||||
OPENCLAW_TRAY_DATA_DIR = $validationAppDataRoot
|
||||
OPENCLAW_TRAY_APPDATA_DIR = $validationAppDataRoot
|
||||
OPENCLAW_TRAY_LOCALAPPDATA_DIR = $validationLocalAppDataRoot
|
||||
OPENCLAW_VISUAL_TEST = "1"
|
||||
OPENCLAW_VISUAL_TEST_DIR = $screenshotsRoot
|
||||
}
|
||||
|
||||
$saved = @{}
|
||||
foreach ($k in $env.Keys) {
|
||||
$saved[$k] = [Environment]::GetEnvironmentVariable($k, "Process")
|
||||
[Environment]::SetEnvironmentVariable($k, [string]$env[$k], "Process")
|
||||
}
|
||||
|
||||
try {
|
||||
New-Item -ItemType Directory -Force -Path $screenshotsRoot | Out-Null
|
||||
if (-not (Test-Path -LiteralPath $trayExe)) {
|
||||
throw "Built tray executable not found at $trayExe. Run build.ps1 first or omit -NoBuild."
|
||||
}
|
||||
$proc = Start-Process -FilePath $trayExe -WorkingDirectory $repoRoot -PassThru
|
||||
Add-Step "launch-tray" "Completed" "Launched tray onboarding for WSL local setup." @{
|
||||
pid = $proc.Id; screenshots = $screenshotsRoot; file = $trayExe; runtimeIdentifier = $runtimeIdentifier
|
||||
}
|
||||
return $proc
|
||||
} finally {
|
||||
foreach ($k in $env.Keys) {
|
||||
[Environment]::SetEnvironmentVariable($k, $saved[$k], "Process")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Wait-ForSetupCompletion {
|
||||
param([int]$TimeoutSeconds)
|
||||
$deadline = (Get-Date).AddSeconds($TimeoutSeconds)
|
||||
$lastPhase = ""; $lastStatus = ""
|
||||
while ((Get-Date) -lt $deadline) {
|
||||
if (Test-Path -LiteralPath $setupStatePath) {
|
||||
$text = Read-TextFileWithRetry -Path $setupStatePath
|
||||
$state = $text | ConvertFrom-Json
|
||||
$copy = Join-Path $runRoot "setup-state.json"
|
||||
$text | Set-Content -LiteralPath $copy -Encoding UTF8
|
||||
|
||||
$phase = Convert-SetupPhase $state.Phase
|
||||
$status = Convert-SetupStatus $state.Status
|
||||
if ($phase -ne $lastPhase -or $status -ne $lastStatus) {
|
||||
$lastPhase = $phase; $lastStatus = $status
|
||||
$script:summary.setupPhases += [ordered]@{
|
||||
phase = $phase; status = $status; message = [string]$state.UserMessage; timestamp = (Get-Date).ToString("o")
|
||||
}
|
||||
Add-Step "setup-phase-$phase" $status ([string]$state.UserMessage) @{ phase = $phase; status = $status }
|
||||
}
|
||||
|
||||
if ($status -eq "Complete") {
|
||||
if ($state.PSObject.Properties.Name -contains "GatewayUrl" -and -not [string]::IsNullOrWhiteSpace([string]$state.GatewayUrl)) {
|
||||
$script:GatewayUrl = [string]$state.GatewayUrl
|
||||
$script:summary.selectedGatewayUrl = $script:GatewayUrl
|
||||
}
|
||||
Add-Step "setup-state" "Passed" "Setup reached $status." @{
|
||||
status = $status; phase = $phase; path = $copy
|
||||
gatewayUrl = (Get-SafeUriDisplay $script:GatewayUrl)
|
||||
}
|
||||
return
|
||||
}
|
||||
if ($status -in @("FailedRetryable", "FailedTerminal", "Blocked", "Cancelled")) {
|
||||
Save-DiagnosticsSnapshot -Reason "setup-failed-$phase"
|
||||
throw "Setup failed with status $status, phase $phase, code $($state.FailureCode): $($state.UserMessage). Diagnostics: https://aka.ms/wsllogs."
|
||||
}
|
||||
}
|
||||
Start-Sleep -Seconds 2
|
||||
}
|
||||
Save-DiagnosticsSnapshot -Reason "setup-timeout"
|
||||
throw "Setup did not reach Complete within $TimeoutSeconds seconds. Diagnostics: https://aka.ms/wsllogs."
|
||||
}
|
||||
|
||||
function Invoke-TrayLocalSetup {
|
||||
$proc = Start-TrayForLocalSetup
|
||||
Start-Sleep -Seconds 5
|
||||
|
||||
# SetupWarningPage hosts the "Set up locally" primary button.
|
||||
if ($null -eq (Wait-ForUiAutomationElement -AutomationId "OnboardingSetupLocal" -TimeoutSeconds 60)) {
|
||||
Save-DiagnosticsSnapshot -Reason "setup-local-button-not-found"
|
||||
throw "UI automation target OnboardingSetupLocal was not found on SetupWarningPage."
|
||||
}
|
||||
Invoke-UiAutomationClick -AutomationId "OnboardingSetupLocal" -TimeoutSeconds 5
|
||||
|
||||
# LocalSetupProgressPage starts the engine on appearance; just wait for state.
|
||||
Wait-ForSetupCompletion -TimeoutSeconds $TimeoutSeconds
|
||||
return $proc
|
||||
}
|
||||
|
||||
function Stop-TrayProcess {
|
||||
param([object]$Process)
|
||||
if ($null -ne $Process) {
|
||||
$procId = $Process.Id
|
||||
$live = Get-Process -Id $procId -ErrorAction SilentlyContinue
|
||||
if ($null -ne $live) {
|
||||
Stop-Process -Id $procId -Force
|
||||
Add-Step "stop-tray" "Completed" "Stopped tray process by PID after setup validation." @{ pid = $procId }
|
||||
} else {
|
||||
Add-Step "stop-tray" "Skipped" "Tray process had already exited before cleanup." @{ pid = $procId }
|
||||
}
|
||||
}
|
||||
Stop-ExistingTrayProcesses -Reason "post-validation"
|
||||
Stop-WslKeepAliveProcesses
|
||||
}
|
||||
|
||||
function Convert-GatewayUrlToHealthUri {
|
||||
param([string]$Url)
|
||||
$b = [System.UriBuilder]::new($Url)
|
||||
if ($b.Scheme -eq "ws") { $b.Scheme = "http" }
|
||||
elseif ($b.Scheme -eq "wss") { $b.Scheme = "https" }
|
||||
$b.Path = ($b.Path.TrimEnd("/") + "/health")
|
||||
return $b.Uri.AbsoluteUri
|
||||
}
|
||||
|
||||
function Save-LoopbackNetworkDiagnostics {
|
||||
param([string]$Reason)
|
||||
# Loopback only - no WSL IP, no `hostname -I`, no lan probes.
|
||||
$safe = $Reason -replace "[^a-zA-Z0-9_.-]", "-"
|
||||
$tcpPath = Join-Path $commandsRoot "network-$safe-windows-tcp-18789.json"
|
||||
try {
|
||||
$cs = @(Get-NetTCPConnection -LocalPort 18789 -ErrorAction Stop | ForEach-Object {
|
||||
[ordered]@{
|
||||
localAddress = $_.LocalAddress; localPort = $_.LocalPort
|
||||
state = $_.State.ToString(); owningProcess = $_.OwningProcess
|
||||
}
|
||||
})
|
||||
$cs | ConvertTo-Json -Depth 5 | Set-Content -LiteralPath $tcpPath -Encoding UTF8
|
||||
Add-Step "network-$safe-windows-tcp" "Completed" "Captured Windows TCP listener state for loopback gateway port." @{ path = $tcpPath }
|
||||
} catch {
|
||||
$_.Exception.Message | Set-Content -LiteralPath $tcpPath -Encoding UTF8
|
||||
Add-Step "network-$safe-windows-tcp" "Skipped" "Could not capture Windows TCP listener state. See https://aka.ms/wsllogs." @{ path = $tcpPath }
|
||||
}
|
||||
}
|
||||
|
||||
function Save-RedactedSettings {
|
||||
if (-not (Test-Path -LiteralPath $settingsPath)) {
|
||||
Add-Step "settings-redacted" "Skipped" "Tray settings file was not found."
|
||||
return
|
||||
}
|
||||
$copy = Join-Path $runRoot "settings.redacted.json"
|
||||
$c = Read-TextFileWithRetry -Path $settingsPath
|
||||
$c = $c -replace '("(?:Token|token|GatewayToken|BootstrapToken|bootstrapToken|bootstrap_token|NodeToken|nodeToken)"\s*:\s*")[^"]*(")', '$1<redacted>$2'
|
||||
$c | Set-Content -LiteralPath $copy -Encoding UTF8
|
||||
Add-Step "settings-redacted" "Completed" "Saved redacted tray settings." @{ path = $copy }
|
||||
}
|
||||
|
||||
function Test-SetupHistoryPhase {
|
||||
param([string]$Phase)
|
||||
if (-not (Test-Path -LiteralPath $setupStatePath)) { return $false }
|
||||
$state = Read-TextFileWithRetry -Path $setupStatePath | ConvertFrom-Json
|
||||
if (-not ($state.PSObject.Properties.Name -contains "History")) { return $false }
|
||||
foreach ($e in @($state.History)) {
|
||||
if ((Convert-SetupPhase $e.Phase) -eq $Phase -and (Convert-SetupStatus $e.Status) -in @("Running", "Complete")) {
|
||||
return $true
|
||||
}
|
||||
}
|
||||
return (Convert-SetupPhase $state.Phase) -eq $Phase
|
||||
}
|
||||
|
||||
function Save-RedactedDeviceIdentityShape {
|
||||
$idp = Join-Path $validationAppDataRoot "OpenClawTray\device-key-ed25519.json"
|
||||
if (-not (Test-Path -LiteralPath $idp)) {
|
||||
Add-Step "device-identity" "Failed" "Device identity file was not found." @{ path = $idp }
|
||||
return $false
|
||||
}
|
||||
$copy = Join-Path $runRoot "device-key.shape.redacted.json"
|
||||
Copy-RedactedFileIfExists -SourcePath $idp -DestinationPath $copy | Out-Null
|
||||
try {
|
||||
$id = Get-Content -LiteralPath $idp -Raw | ConvertFrom-Json
|
||||
$hasOperatorToken = ($id.PSObject.Properties.Name -contains "DeviceToken" -and -not [string]::IsNullOrWhiteSpace([string]$id.DeviceToken)) -or
|
||||
($id.PSObject.Properties.Name -contains "OperatorDeviceToken" -and -not [string]::IsNullOrWhiteSpace([string]$id.OperatorDeviceToken))
|
||||
Add-Step "device-identity" ($(if ($hasOperatorToken) { "Passed" } else { "Failed" })) "Checked stored device identity token shape." @{
|
||||
path = $copy; hasOperatorToken = $hasOperatorToken
|
||||
}
|
||||
return $hasOperatorToken
|
||||
} catch {
|
||||
Add-Step "device-identity" "Failed" "Device identity JSON could not be parsed." @{ path = $copy }
|
||||
return $false
|
||||
}
|
||||
}
|
||||
|
||||
function Test-JsonStringProperty {
|
||||
param([object]$Json, [string[]]$Names)
|
||||
foreach ($n in $Names) {
|
||||
if ($Json.PSObject.Properties.Name -contains $n) {
|
||||
$v = [string]$Json.$n
|
||||
if (-not [string]::IsNullOrWhiteSpace($v)) { return $true }
|
||||
}
|
||||
}
|
||||
return $false
|
||||
}
|
||||
|
||||
function Get-JsonStringProperty {
|
||||
param([object]$Json, [string]$Name)
|
||||
if ($Json -and $Json.PSObject.Properties.Name -contains $Name) { return [string]$Json.$Name }
|
||||
return ""
|
||||
}
|
||||
|
||||
function Invoke-BootstrapHandoffProbe {
|
||||
# Real upstream setup-code / bootstrap proof.
|
||||
$stdout = Join-Path $commandsRoot "wsl-bootstrap-token.stdout.txt"
|
||||
$stderr = Join-Path $commandsRoot "wsl-bootstrap-token.stderr.txt"
|
||||
$args = @("-d", $DistroName, "--", "/opt/openclaw/bin/openclaw", "qr", "--json", "--url", $GatewayUrl)
|
||||
& wsl.exe @args > $stdout 2> $stderr
|
||||
$exitCode = if ($null -eq $global:LASTEXITCODE) { 0 } else { $global:LASTEXITCODE }
|
||||
$raw = if (Test-Path -LiteralPath $stdout) { Read-TextFileWithRetry -Path $stdout -Attempts 20 -DelayMilliseconds 250 } else { "" }
|
||||
Write-TextFileWithRetry -Path $stdout -Content (Redact-SensitiveGatewayOutput $raw) -Attempts 20 -DelayMilliseconds 250
|
||||
|
||||
if ($exitCode -ne 0) {
|
||||
Add-Step "wsl-bootstrap-token" "Failed" "Gateway QR command failed with exit code $exitCode." @{
|
||||
arguments = ($args -join " "); exitCode = $exitCode; stdout = $stdout; stderr = $stderr
|
||||
}
|
||||
throw "BootstrapTokenCommandFailed: openclaw qr --json failed. See $stdout and $stderr."
|
||||
}
|
||||
|
||||
$hasSetupCode = $false; $hasDirectToken = $false
|
||||
try {
|
||||
$qr = $raw | ConvertFrom-Json
|
||||
$hasSetupCode = Test-JsonStringProperty $qr @("setupCode", "setup_code")
|
||||
$hasDirectToken = Test-JsonStringProperty $qr @("bootstrapToken", "bootstrap_token", "token")
|
||||
} catch {
|
||||
throw "BootstrapTokenJsonInvalid: openclaw qr --json did not produce valid JSON: $($_.Exception.Message)"
|
||||
}
|
||||
|
||||
$shape = if ($hasSetupCode) { "UpstreamSetupCode" } elseif ($hasDirectToken) { "DirectBootstrapToken" } else { "Unknown" }
|
||||
$script:summary.pairingValidation["bootstrapQrShape"] = $shape
|
||||
$script:summary.pairingValidation["realUpstreamBootstrapHandoff"] = $hasSetupCode
|
||||
|
||||
Add-Step "wsl-bootstrap-token" "Completed" "Gateway QR command completed; bootstrap shape is $shape." @{
|
||||
arguments = ($args -join " "); exitCode = $exitCode; stdout = $stdout; stderr = $stderr; bootstrapQrShape = $shape; realUpstreamBootstrapHandoff = $hasSetupCode
|
||||
}
|
||||
|
||||
if ($RequireRealGatewayBootstrap -and -not $hasSetupCode) {
|
||||
throw "RealGatewayBootstrapRequired: expected upstream setupCode bootstrap handoff, but openclaw qr --json returned $shape."
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-OperatorPairingProof {
|
||||
if (-not $RequireOperatorPairing) {
|
||||
Add-Step "operator-pairing-proof" "Skipped" "Operator pairing proof was not required."
|
||||
return
|
||||
}
|
||||
if (-not (Test-SetupHistoryPhase -Phase "PairOperator")) {
|
||||
Save-DiagnosticsSnapshot -Reason "operator-pair-phase-missing"
|
||||
throw "OperatorPairingProofFailed: setup state did not record PairOperator."
|
||||
}
|
||||
if (-not (Save-RedactedDeviceIdentityShape)) {
|
||||
Save-DiagnosticsSnapshot -Reason "operator-device-token-missing"
|
||||
throw "OperatorPairingProofFailed: stored operator device token is missing."
|
||||
}
|
||||
Invoke-LoggedProcess "operator-stored-token-reconnect" "dotnet" @(
|
||||
"run", "--project", $cliProject, "--",
|
||||
"--probe-read", "--skip-chat", "--require-stored-device-token",
|
||||
"--connect-timeout-ms", "15000"
|
||||
) -Environment (Get-ValidationAppEnvironment) -SensitiveOutput
|
||||
|
||||
$script:summary.pairingValidation["operatorPaired"] = $true
|
||||
Add-Step "operator-pairing-proof" "Passed" "Stored operator device token reconnect succeeded."
|
||||
}
|
||||
|
||||
function Invoke-WindowsNodePairingProof {
|
||||
# Windows tray IS the node (per Mike). Confirm the PairWindowsTrayNode phase
|
||||
# ran and that gateway node.list returns the tray node.
|
||||
if (-not $RequireWindowsNodePairing) {
|
||||
Add-Step "windows-node-pairing-proof" "Skipped" "Windows tray node pairing proof was not required."
|
||||
return
|
||||
}
|
||||
if (-not (Test-SetupHistoryPhase -Phase "PairWindowsTrayNode")) {
|
||||
Save-DiagnosticsSnapshot -Reason "windows-node-pair-phase-missing"
|
||||
throw "WindowsNodePairingProofFailed: setup state did not record PairWindowsTrayNode."
|
||||
}
|
||||
Invoke-LoggedProcess "windows-node-list-proof" "dotnet" @(
|
||||
"run", "--project", $cliProject, "--",
|
||||
"--probe-read", "--skip-chat", "--require-stored-device-token", "--require-node",
|
||||
"--connect-timeout-ms", "90000"
|
||||
) -Environment (Get-ValidationAppEnvironment) -SensitiveOutput
|
||||
|
||||
$script:summary.pairingValidation["windowsNodePaired"] = $true
|
||||
Add-Step "windows-node-pairing-proof" "Passed" "Gateway node.list returned the Windows tray node."
|
||||
}
|
||||
|
||||
function Invoke-SmokeChecks {
|
||||
Invoke-LoggedProcess "wsl-list-after" "wsl.exe" @("--list", "--verbose") -IgnoreExitCode
|
||||
Save-LoopbackNetworkDiagnostics -Reason "post-install"
|
||||
|
||||
# Gateway in WSL via systemd user unit (UpstreamInstall layout).
|
||||
Invoke-LoggedProcess "wsl-openclaw-version" "wsl.exe" @(
|
||||
"-d", $DistroName, "-u", "openclaw", "--", "/opt/openclaw/bin/openclaw", "--version")
|
||||
Invoke-LoggedProcess "wsl-openclaw-config-validate" "wsl.exe" @(
|
||||
"-d", $DistroName, "-u", "openclaw", "--", "/opt/openclaw/bin/openclaw", "config", "validate")
|
||||
Invoke-LoggedProcess "wsl-gateway-journal" "wsl.exe" @(
|
||||
"-d", $DistroName, "-u", "root", "--", "journalctl", "--user", "-u", "openclaw-gateway",
|
||||
"--no-pager", "-n", "200") -IgnoreExitCode -SensitiveOutput
|
||||
|
||||
# Loopback-only health probe.
|
||||
$healthUri = Convert-GatewayUrlToHealthUri -Url $GatewayUrl
|
||||
$healthPath = Join-Path $commandsRoot "gateway-health.json"
|
||||
try {
|
||||
$h = Invoke-RestMethod -Uri $healthUri -TimeoutSec 10
|
||||
$h | ConvertTo-Json -Depth 10 | Set-Content -LiteralPath $healthPath -Encoding UTF8
|
||||
if (-not $h.ok) { throw "Gateway health response did not contain ok=true." }
|
||||
$gw = if ($h.PSObject.Properties.Name -contains "gateway") { $h.gateway } else { $null }
|
||||
$version = Get-JsonStringProperty $gw "version"
|
||||
$displayName = Get-JsonStringProperty $gw "displayName"
|
||||
$isDev = $version -like "*-dev*" -or $displayName -like "Dev OpenClaw*"
|
||||
$script:summary.pairingValidation["gatewayImplementation"] = if ($isDev) { "DevShim" } else { "ProductionCandidate" }
|
||||
Add-Step "gateway-health" "Passed" "Gateway health endpoint returned ok=true." @{ uri = $healthUri; path = $healthPath }
|
||||
} catch {
|
||||
throw "Gateway health check failed for ${healthUri}: $($_.Exception.Message). Diagnostics: https://aka.ms/wsllogs."
|
||||
}
|
||||
|
||||
Invoke-BootstrapHandoffProbe
|
||||
Save-RedactedSettings
|
||||
Invoke-OperatorPairingProof
|
||||
Invoke-WindowsNodePairingProof
|
||||
|
||||
$args = @(
|
||||
"run", "--project", $cliProject, "--",
|
||||
"--probe-read", "--skip-chat",
|
||||
"--message", "openclaw validation ping",
|
||||
"--connect-timeout-ms", "15000"
|
||||
)
|
||||
if ($RequireOperatorPairing) { $args += "--require-stored-device-token" }
|
||||
Invoke-LoggedProcess "openclaw-cli-probe" "dotnet" $args -Environment (Get-ValidationAppEnvironment) -SensitiveOutput
|
||||
}
|
||||
|
||||
function Invoke-DistroUnregisterIfPresent {
|
||||
param([string]$Reason)
|
||||
Stop-WslKeepAliveProcesses
|
||||
# Authoritative repair primitive: `wsl --unregister`. NEVER `wsl --shutdown`.
|
||||
Invoke-LoggedProcess "wsl-unregister-$Reason" "wsl.exe" @("--unregister", $DistroName) -IgnoreExitCode
|
||||
|
||||
if (Test-Path -LiteralPath $wslInstallLocation) {
|
||||
try {
|
||||
Remove-Item -LiteralPath $wslInstallLocation -Recurse -Force -ErrorAction Stop
|
||||
Add-Step "remove-install-location-$Reason" "Completed" "Removed install location directory." @{ path = $wslInstallLocation }
|
||||
} catch {
|
||||
Add-Step "remove-install-location-$Reason" "Skipped" "Could not remove install location: $($_.Exception.Message)" @{ path = $wslInstallLocation }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-PreIterationCleanup {
|
||||
param([int]$Index)
|
||||
if ($Scenario -in @("FreshMachine", "Recreate")) {
|
||||
Invoke-DistroUnregisterIfPresent -Reason "iteration-$Index-pre"
|
||||
# Wipe isolated AppData so identity store starts empty.
|
||||
foreach ($p in @($validationAppDataRoot, $validationLocalAppDataRoot)) {
|
||||
if (Test-Path -LiteralPath $p) {
|
||||
try { Remove-Item -LiteralPath $p -Recurse -Force -ErrorAction Stop } catch { }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Stop-WslKeepAliveProcesses
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-PostIterationCleanup {
|
||||
param([int]$Index, [bool]$IterationFailed)
|
||||
if ($Scenario -ne "Recreate") {
|
||||
$script:summary.cleanupStatus = if ($script:summary.cleanupStatus -eq "Failed") { "Failed" } else { "Skipped" }
|
||||
Add-Step "iteration-$Index-cleanup" "Skipped" "Post-iteration distro cleanup is only required in Recreate scenario."
|
||||
return "Skipped"
|
||||
}
|
||||
if ($IterationFailed -and $KeepFailedDistro) {
|
||||
$script:summary.cleanupStatus = if ($script:summary.cleanupStatus -eq "Failed") { "Failed" } else { "Skipped" }
|
||||
Add-Step "iteration-$Index-cleanup" "Skipped" "Keeping failed WSL distro for inspection (-KeepFailedDistro)." @{ distroName = $DistroName }
|
||||
return "Skipped"
|
||||
}
|
||||
if (-not $IterationFailed -and -not $CleanupAfterSuccess) {
|
||||
$script:summary.cleanupStatus = if ($script:summary.cleanupStatus -eq "Failed") { "Failed" } else { "Skipped" }
|
||||
Add-Step "iteration-$Index-cleanup" "Skipped" "Leaving successful distro (-CleanupAfterSuccess:`$false)." @{ distroName = $DistroName }
|
||||
return "Skipped"
|
||||
}
|
||||
try {
|
||||
$script:summary.cleanupStatus = "Running"
|
||||
Invoke-DistroUnregisterIfPresent -Reason "iteration-$Index-post"
|
||||
$script:summary.cleanupStatus = "Passed"
|
||||
Add-Step "iteration-$Index-cleanup" "Passed" "Cleaned recreated WSL distro after validation iteration." @{ distroName = $DistroName }
|
||||
return "Passed"
|
||||
} catch {
|
||||
$script:summary.cleanupStatus = "Failed"
|
||||
Add-Step "iteration-$Index-cleanup" "Failed" $_.Exception.Message
|
||||
if (-not $ContinueOnCleanupFailure) { throw }
|
||||
return "Failed"
|
||||
}
|
||||
}
|
||||
|
||||
function New-IterationRecord {
|
||||
param([int]$Index)
|
||||
return [ordered]@{
|
||||
index = $Index
|
||||
distroName = $DistroName
|
||||
installLocation = $wslInstallLocation
|
||||
validationStatus = "Running"
|
||||
cleanupStatus = "NotStarted"
|
||||
error = $null
|
||||
cleanupError = $null
|
||||
startedAt = (Get-Date).ToString("o")
|
||||
finishedAt = $null
|
||||
}
|
||||
}
|
||||
|
||||
function Invoke-ValidationIteration {
|
||||
param([int]$Index)
|
||||
$iteration = New-IterationRecord -Index $Index
|
||||
$script:summary.iterations += $iteration
|
||||
Add-Step "iteration-$Index" "Started" "Starting validation iteration $Index."
|
||||
$trayProcess = $null
|
||||
$iterationFailed = $false
|
||||
|
||||
try {
|
||||
Invoke-RepositoryValidation
|
||||
Invoke-PreIterationCleanup -Index $Index
|
||||
$trayProcess = Invoke-TrayLocalSetup
|
||||
Invoke-SmokeChecks
|
||||
|
||||
Add-Step "iteration-$Index" "Passed" "Validation iteration $Index passed."
|
||||
$iteration.validationStatus = "Passed"
|
||||
$script:summary.validationStatus = "Passed"
|
||||
} catch {
|
||||
$iterationFailed = $true
|
||||
$iteration.validationStatus = "Failed"
|
||||
$iteration.error = $_.Exception.Message
|
||||
$script:summary.validationStatus = "Failed"
|
||||
Save-DiagnosticsSnapshot -Reason "iteration-$Index-failed"
|
||||
throw
|
||||
} finally {
|
||||
try {
|
||||
Stop-TrayProcess -Process $trayProcess
|
||||
$iteration.cleanupStatus = Invoke-PostIterationCleanup -Index $Index -IterationFailed $iterationFailed
|
||||
} catch {
|
||||
$iteration.cleanupStatus = "Failed"
|
||||
$iteration.cleanupError = $_.Exception.Message
|
||||
throw
|
||||
} finally {
|
||||
$iteration.finishedAt = (Get-Date).ToString("o")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Force -Path $runRoot, $commandsRoot, $screenshotsRoot | Out-Null
|
||||
|
||||
$exitCode = 0
|
||||
try {
|
||||
Assert-DestructiveSafety
|
||||
Invoke-Preflight
|
||||
|
||||
if ($Scenario -eq "PreflightOnly") {
|
||||
Add-Step "scenario" "Passed" "Preflight completed."
|
||||
$script:summary.validationStatus = "Passed"
|
||||
$script:summary.cleanupStatus = "Skipped"
|
||||
} elseif ($Scenario -eq "Recreate" -or $Iterations -gt 1) {
|
||||
if ($Iterations -lt 1) { throw "-Iterations must be at least 1." }
|
||||
for ($i = 1; $i -le $Iterations; $i++) {
|
||||
try { Invoke-ValidationIteration -Index $i }
|
||||
catch {
|
||||
Add-Step "iteration-$i" "Failed" $_.Exception.Message
|
||||
if (-not $ContinueOnFailure) { throw }
|
||||
}
|
||||
}
|
||||
} else {
|
||||
# UpstreamInstall or FreshMachine, single shot.
|
||||
Invoke-ValidationIteration -Index 1
|
||||
}
|
||||
|
||||
if ($script:summary.validationStatus -eq "Running") { $script:summary.validationStatus = "Passed" }
|
||||
if ($script:summary.cleanupStatus -in @("Running", "NotStarted")) { $script:summary.cleanupStatus = "Skipped" }
|
||||
if ($script:summary.validationStatus -eq "Failed") {
|
||||
$script:summary.status = "Failed"; $exitCode = 1
|
||||
} else {
|
||||
$script:summary.status = if ($script:summary.cleanupStatus -eq "Failed") { "PassedWithCleanupFailure" } else { "Passed" }
|
||||
}
|
||||
} catch {
|
||||
$script:summary.status = "Failed"
|
||||
if ($script:summary.validationStatus -eq "Running") { $script:summary.validationStatus = "Failed" }
|
||||
if ($script:summary.cleanupStatus -eq "Running") { $script:summary.cleanupStatus = "Failed" }
|
||||
$script:summary.error = $_.Exception.Message
|
||||
Add-Step "validation" "Failed" $_.Exception.Message
|
||||
$exitCode = 1
|
||||
} finally {
|
||||
Write-Summary
|
||||
}
|
||||
|
||||
Write-Host "Validation summary: $summaryPath"
|
||||
if ($script:summary.status -eq "Failed") {
|
||||
Write-Host "Diagnostics: see https://aka.ms/wsllogs for WSL networking/lifecycle logs."
|
||||
}
|
||||
exit $exitCode
|
||||
@ -89,10 +89,9 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)'!='Debug'">
|
||||
<!-- Trimming requires self-contained publish. Only enable when a RID is set
|
||||
(i.e. during `dotnet publish -r <rid>`), otherwise plain `dotnet build -c Release`
|
||||
fails with NETSDK1102. -->
|
||||
<PublishTrimmed Condition="'$(RuntimeIdentifier)' != ''">true</PublishTrimmed>
|
||||
<!-- In Release builds, trimming is enabled by default.
|
||||
feel free to disable this if needed -->
|
||||
<PublishTrimmed>true</PublishTrimmed>
|
||||
|
||||
<!-- In release, also ignore the aforementioned ILLink warning -->
|
||||
<ILLinkTreatWarningsAsErrors>false</ILLinkTreatWarningsAsErrors>
|
||||
|
||||
@ -24,26 +24,6 @@ internal sealed partial class OpenClawPage : ListPage
|
||||
Title = "🦞 Open Dashboard",
|
||||
Subtitle = "Open OpenClaw web dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://dashboard/sessions"))
|
||||
{
|
||||
Title = "💬 Dashboard: Sessions",
|
||||
Subtitle = "Open the sessions dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://dashboard/channels"))
|
||||
{
|
||||
Title = "📡 Dashboard: Channels",
|
||||
Subtitle = "Open the channel configuration dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://dashboard/skills"))
|
||||
{
|
||||
Title = "🧩 Dashboard: Skills",
|
||||
Subtitle = "Open the skills dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://dashboard/cron"))
|
||||
{
|
||||
Title = "⏱️ Dashboard: Cron",
|
||||
Subtitle = "Open the scheduled jobs dashboard"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://chat"))
|
||||
{
|
||||
Title = "💬 Web Chat",
|
||||
@ -54,110 +34,10 @@ internal sealed partial class OpenClawPage : ListPage
|
||||
Title = "📝 Quick Send",
|
||||
Subtitle = "Send a message to OpenClaw"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://setup"))
|
||||
{
|
||||
Title = "🧭 Setup Wizard",
|
||||
Subtitle = "Open QR, setup code, and manual gateway pairing"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://commandcenter"))
|
||||
{
|
||||
Title = "🧭 Command Center",
|
||||
Subtitle = "Open gateway, tunnel, node, and browser diagnostics"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://healthcheck"))
|
||||
{
|
||||
Title = "🔄 Run Health Check",
|
||||
Subtitle = "Refresh gateway or node connection health"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://check-updates"))
|
||||
{
|
||||
Title = "⬇️ Check for Updates",
|
||||
Subtitle = "Run a manual GitHub Releases update check"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://activity"))
|
||||
{
|
||||
Title = "⚡ Activity Stream",
|
||||
Subtitle = "Open recent tray activity and support bundle actions"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://history"))
|
||||
{
|
||||
Title = "📋 Notification History",
|
||||
Subtitle = "Open recent OpenClaw tray notifications"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://settings"))
|
||||
{
|
||||
Title = "⚙️ Settings",
|
||||
Subtitle = "Configure OpenClaw Tray"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://logs"))
|
||||
{
|
||||
Title = "📄 Open Log File",
|
||||
Subtitle = "Open the current OpenClaw Tray log"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://log-folder"))
|
||||
{
|
||||
Title = "📁 Open Logs Folder",
|
||||
Subtitle = "Open the OpenClaw Tray logs folder"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://config"))
|
||||
{
|
||||
Title = "🗂️ Open Config Folder",
|
||||
Subtitle = "Open the OpenClaw Tray configuration folder"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://diagnostics"))
|
||||
{
|
||||
Title = "🧪 Open Diagnostics Folder",
|
||||
Subtitle = "Open the OpenClaw Tray diagnostics JSONL folder"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://support-context"))
|
||||
{
|
||||
Title = "📋 Copy Support Context",
|
||||
Subtitle = "Copy redacted Command Center support metadata"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://debug-bundle"))
|
||||
{
|
||||
Title = "🧰 Copy Debug Bundle",
|
||||
Subtitle = "Copy support context plus port, capability, node, channel, and activity diagnostics"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://browser-setup"))
|
||||
{
|
||||
Title = "🌐 Copy Browser Setup",
|
||||
Subtitle = "Copy browser.proxy and node-host setup guidance"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://port-diagnostics"))
|
||||
{
|
||||
Title = "🔌 Copy Port Diagnostics",
|
||||
Subtitle = "Copy gateway, browser proxy, tunnel ports, owners, and stop hints"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://capability-diagnostics"))
|
||||
{
|
||||
Title = "🛡️ Copy Capability Diagnostics",
|
||||
Subtitle = "Copy permissions, allowlist health, and parity diagnostics"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://node-inventory"))
|
||||
{
|
||||
Title = "🖥️ Copy Node Inventory",
|
||||
Subtitle = "Copy connected node capabilities, commands, and policy status"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://channel-summary"))
|
||||
{
|
||||
Title = "📡 Copy Channel Summary",
|
||||
Subtitle = "Copy channel health and start/stop availability"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://activity-summary"))
|
||||
{
|
||||
Title = "⚡ Copy Activity Summary",
|
||||
Subtitle = "Copy recent tray activity for troubleshooting"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://extensibility-summary"))
|
||||
{
|
||||
Title = "🧩 Copy Extensibility Summary",
|
||||
Subtitle = "Copy channel, skills, and cron dashboard surface guidance"
|
||||
},
|
||||
new ListItem(new OpenUrlCommand("openclaw://restart-ssh-tunnel"))
|
||||
{
|
||||
Title = "🔁 Restart SSH Tunnel",
|
||||
Subtitle = "Restart the tray-managed SSH tunnel when enabled"
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>Result of a speech-to-text transcription segment.</summary>
|
||||
public sealed class TranscriptionResult
|
||||
{
|
||||
public string Text { get; init; } = "";
|
||||
public TimeSpan Start { get; init; }
|
||||
public TimeSpan End { get; init; }
|
||||
public string Language { get; init; } = "en";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Aggregated result of a single silence-bounded utterance — i.e. all the
|
||||
/// Whisper segments produced from one VAD-bounded speech burst, combined.
|
||||
/// Consumers that need "what the user said" (chat submission, stt.listen)
|
||||
/// should listen for this event instead of per-segment TranscriptionResult
|
||||
/// to avoid sending partial text.
|
||||
/// </summary>
|
||||
public sealed class UtteranceResult
|
||||
{
|
||||
/// <summary>Concatenated text across all segments, single-spaced.</summary>
|
||||
public string Text { get; init; } = "";
|
||||
/// <summary>Language detected on the first segment, or null if no segments.</summary>
|
||||
public string? Language { get; init; }
|
||||
/// <summary>Start of the first segment relative to capture start.</summary>
|
||||
public TimeSpan Start { get; init; }
|
||||
/// <summary>End of the last segment relative to capture start.</summary>
|
||||
public TimeSpan End { get; init; }
|
||||
/// <summary>Immutable snapshot of the per-segment results.</summary>
|
||||
public IReadOnlyList<TranscriptionResult> Segments { get; init; } = Array.Empty<TranscriptionResult>();
|
||||
}
|
||||
|
||||
/// <summary>Voice-activity detection event.</summary>
|
||||
public sealed class VadEvent
|
||||
{
|
||||
public bool IsSpeaking { get; init; }
|
||||
public float Probability { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>Configuration for the audio pipeline.</summary>
|
||||
public sealed class AudioPipelineOptions
|
||||
{
|
||||
/// <summary>Path to the Whisper GGML model file.</summary>
|
||||
public string ModelPath { get; init; } = "";
|
||||
|
||||
/// <summary>Language code for STT (e.g. "en", "auto").</summary>
|
||||
public string Language { get; init; } = "auto";
|
||||
|
||||
/// <summary>Seconds of silence before a speech segment is finalized.</summary>
|
||||
public float SilenceTimeoutSeconds { get; init; } = 1.5f;
|
||||
|
||||
/// <summary>Optional audio device ID. Null = system default microphone.</summary>
|
||||
public string? DeviceId { get; init; }
|
||||
|
||||
/// <summary>VAD probability threshold (0.0–1.0). Audio above this is considered speech.</summary>
|
||||
public float VadThreshold { get; init; } = 0.3f;
|
||||
}
|
||||
|
||||
/// <summary>Pipeline state.</summary>
|
||||
public enum AudioPipelineState
|
||||
{
|
||||
Stopped,
|
||||
Starting,
|
||||
Listening,
|
||||
Processing,
|
||||
Error
|
||||
}
|
||||
@ -1,390 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.IO.Compression;
|
||||
using System.Net.Http;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Manages downloads and on-disk lifecycle for Piper TTS voices.
|
||||
///
|
||||
/// Each "voice" is a sherpa-onnx pre-packaged tarball that contains
|
||||
/// everything needed for offline synthesis — the .onnx model, the
|
||||
/// tokens.txt phoneme map, and the language-specific espeak-ng-data.
|
||||
/// We use the sherpa-onnx repackaged distribution rather than the raw
|
||||
/// HuggingFace Piper voices because the latter requires the user (or
|
||||
/// us) to ship espeak-ng-data separately (~80 MB shared across voices).
|
||||
///
|
||||
/// Storage layout under the tray's data directory:
|
||||
/// models/piper/<voice-id>/
|
||||
/// <voice-id>.onnx
|
||||
/// tokens.txt
|
||||
/// espeak-ng-data/...
|
||||
///
|
||||
/// Each voice is ~50 MB compressed, ~80 MB extracted (with espeak data).
|
||||
///
|
||||
/// **TODO (pre-GA):** SHA-256 verification of downloaded tarballs before
|
||||
/// extraction (Audio_FollowUps.md §2). The current implementation trusts
|
||||
/// HTTPS + the system trust chain only.
|
||||
/// </summary>
|
||||
public sealed class PiperVoiceManager
|
||||
{
|
||||
private readonly string _voicesDirectory;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
// Per-voice single-flight gate: prevents racing the same voice download
|
||||
// from two callers (e.g. UI and a programmatic caller). Static so two
|
||||
// PiperVoiceManager instances over the same data directory still
|
||||
// coalesce against the same in-flight task.
|
||||
private static readonly ConcurrentDictionary<string, Lazy<Task>> InFlightDownloads = new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>
|
||||
/// Curated catalog of Piper voices we offer in the UI. Each entry is
|
||||
/// a sherpa-onnx pre-packaged tarball from the project's GitHub
|
||||
/// releases. To add a voice: pick its key from
|
||||
/// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models,
|
||||
/// download the tarball, compute its SHA-256, and pin it below.
|
||||
/// Sizes shown in the UI are approximate compressed sizes.
|
||||
///
|
||||
/// SECURITY — pinned SHA-256 hashes (lowercase hex) verified against
|
||||
/// the sherpa-onnx GitHub release on 2026-05-05. Downloads with a
|
||||
/// different hash are rejected and the partial tarball is deleted.
|
||||
/// Before any public release: re-verify each hash from an independent
|
||||
/// source and document provenance in Audio_FollowUps.md §2.
|
||||
/// </summary>
|
||||
public static readonly PiperVoiceInfo[] AvailableVoices =
|
||||
[
|
||||
new("en_US-amy-low", "English (US) — Amy (low quality, fast)", "en-US",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2",
|
||||
"c70f5284a09a7fd4ed203b39b2ff51cac1432b422b852eb647b481dade3cf639"),
|
||||
new("en_US-libritts-high","English (US) — LibriTTS (high quality)", "en-US",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts-high.tar.bz2",
|
||||
"d9d35056703fd38ed38e95c202a50f603fefdc8a92a7b6332c4f1a41616eac72"),
|
||||
new("en_GB-alan-low", "English (GB) — Alan (low quality, fast)", "en-GB",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_GB-alan-low.tar.bz2",
|
||||
"1308e730b7a12c3b64b669d65daa0138fcb83b1a086edee92fa9fa68cb0290dd"),
|
||||
new("fr_FR-siwis-low", "Français (FR) — Siwis (low quality, fast)","fr-FR",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-fr_FR-siwis-low.tar.bz2",
|
||||
"3d69170c160c8375c4123901a72a3845222b39456d39ab74f5bbd7310952b5af"),
|
||||
new("de_DE-thorsten-low","Deutsch (DE) — Thorsten (low quality)", "de-DE",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-de_DE-thorsten-low.tar.bz2",
|
||||
"41fab35910fdcec4696b031951d8fd6c262e594cf77b35e1068fadbeb5a091a6"),
|
||||
new("zh_CN-huayan-medium","中文 (CN) — Huayan (medium quality)", "zh-CN",
|
||||
"https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-zh_CN-huayan-medium.tar.bz2",
|
||||
"dbdfec42b91d9cee31cce9ff4b3e9c305eb6fbf60546d071f7e46273554cce6b"),
|
||||
];
|
||||
|
||||
public PiperVoiceManager(string dataDirectory, IOpenClawLogger logger)
|
||||
{
|
||||
_voicesDirectory = Path.Combine(dataDirectory, "models", "piper");
|
||||
_logger = logger;
|
||||
Directory.CreateDirectory(_voicesDirectory);
|
||||
}
|
||||
|
||||
/// <summary>Root directory where this voice's files live (created lazily).</summary>
|
||||
public string GetVoiceDirectory(string voiceId)
|
||||
{
|
||||
var info = FindVoice(voiceId);
|
||||
return Path.Combine(_voicesDirectory, info.VoiceId);
|
||||
}
|
||||
|
||||
/// <summary>Path to the .onnx model file for a downloaded voice.</summary>
|
||||
public string GetModelPath(string voiceId)
|
||||
{
|
||||
var dir = GetVoiceDirectory(voiceId);
|
||||
// sherpa-onnx tarballs put files at the root of the voice dir; the
|
||||
// model file is named after the voice id.
|
||||
return Path.Combine(dir, $"{voiceId}.onnx");
|
||||
}
|
||||
|
||||
/// <summary>Path to tokens.txt (phoneme map).</summary>
|
||||
public string GetTokensPath(string voiceId) => Path.Combine(GetVoiceDirectory(voiceId), "tokens.txt");
|
||||
|
||||
/// <summary>Path to the espeak-ng-data directory bundled with this voice.</summary>
|
||||
public string GetEspeakDataDir(string voiceId) => Path.Combine(GetVoiceDirectory(voiceId), "espeak-ng-data");
|
||||
|
||||
/// <summary>True when all three files are present on disk.</summary>
|
||||
public bool IsVoiceDownloaded(string voiceId)
|
||||
{
|
||||
try
|
||||
{
|
||||
return File.Exists(GetModelPath(voiceId))
|
||||
&& File.Exists(GetTokensPath(voiceId))
|
||||
&& Directory.Exists(GetEspeakDataDir(voiceId));
|
||||
}
|
||||
catch
|
||||
{
|
||||
// FindVoice throws on unknown voiceId — treat as not-downloaded.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Download and extract a Piper voice from the sherpa-onnx release.
|
||||
/// Reports progress as bytes downloaded / total bytes (extraction
|
||||
/// progress is not reported separately).
|
||||
/// Per-voice single-flight: concurrent calls for the same voice await
|
||||
/// the in-flight download instead of racing on the same temp tarball.
|
||||
/// </summary>
|
||||
public Task DownloadVoiceAsync(
|
||||
string voiceId,
|
||||
IProgress<(long downloaded, long total)>? progress = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var info = FindVoice(voiceId);
|
||||
if (IsVoiceDownloaded(info.VoiceId))
|
||||
{
|
||||
_logger.Info($"Piper voice '{info.VoiceId}' already downloaded");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
// Preflight: bail out before downloading 50-150 MB if the OS isn't
|
||||
// capable of extracting the .tar.bz2 we'd produce. tar.exe ships with
|
||||
// Windows 10 1803+; older systems would fail at the extract step
|
||||
// after a long, wasted download.
|
||||
EnsureExtractorAvailable();
|
||||
|
||||
var key = info.VoiceId;
|
||||
return SingleFlightDownload.RunAsync(
|
||||
InFlightDownloads,
|
||||
key,
|
||||
token => DownloadVoiceCoreAsync(info, progress, token),
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
private async Task DownloadVoiceCoreAsync(
|
||||
PiperVoiceInfo info,
|
||||
IProgress<(long downloaded, long total)>? progress,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// SECURITY: refuse to install any voice that doesn't have a pinned
|
||||
// hash. See Audio_FollowUps.md §2.
|
||||
if (string.IsNullOrWhiteSpace(info.Sha256))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Piper voice '{info.VoiceId}' has no pinned SHA-256; refusing to download. " +
|
||||
"Add a verified hash to AvailableVoices before enabling this voice.");
|
||||
}
|
||||
|
||||
var voiceDir = Path.Combine(_voicesDirectory, info.VoiceId);
|
||||
Directory.CreateDirectory(voiceDir);
|
||||
var tarballPath = Path.Combine(voiceDir, $"{info.VoiceId}.tar.bz2.tmp");
|
||||
_logger.Info($"Downloading Piper voice '{info.VoiceId}' from {info.DownloadUrl}");
|
||||
|
||||
try
|
||||
{
|
||||
using var httpClient = new HttpClient();
|
||||
httpClient.Timeout = TimeSpan.FromMinutes(10);
|
||||
using var response = await httpClient.GetAsync(info.DownloadUrl, HttpCompletionOption.ResponseHeadersRead, cancellationToken).ConfigureAwait(false);
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var totalBytes = response.Content.Headers.ContentLength ?? 0;
|
||||
using (var contentStream = await response.Content.ReadAsStreamAsync(cancellationToken).ConfigureAwait(false))
|
||||
using (var fileStream = new FileStream(tarballPath, FileMode.Create, FileAccess.Write, FileShare.None, 81920))
|
||||
{
|
||||
var buffer = new byte[81920];
|
||||
long downloaded = 0;
|
||||
int bytesRead;
|
||||
while ((bytesRead = await contentStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false)) > 0)
|
||||
{
|
||||
await fileStream.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken).ConfigureAwait(false);
|
||||
downloaded += bytesRead;
|
||||
progress?.Report((downloaded, totalBytes));
|
||||
}
|
||||
}
|
||||
|
||||
// SECURITY: verify SHA-256 of the downloaded tarball BEFORE we
|
||||
// hand it to the extractor. tar reads file contents to disk; an
|
||||
// attacker-controlled tarball could plant arbitrary files (path
|
||||
// traversal aside, the .onnx model itself is loaded into the
|
||||
// process). Fail closed on mismatch — partial dir cleanup runs
|
||||
// in the catch block below.
|
||||
await VerifyHashAsync(tarballPath, info.Sha256, info.VoiceId, cancellationToken);
|
||||
|
||||
_logger.Info($"Extracting Piper voice '{info.VoiceId}'");
|
||||
ExtractTarBz2(tarballPath, voiceDir, cancellationToken);
|
||||
|
||||
// Verify the extraction produced the files we expect; if not,
|
||||
// tear the half-extracted dir down so a retry starts clean.
|
||||
if (!IsVoiceDownloaded(info.VoiceId))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Extraction of Piper voice '{info.VoiceId}' did not produce the expected layout.");
|
||||
}
|
||||
|
||||
_logger.Info($"Piper voice '{info.VoiceId}' verified and ready at {voiceDir}");
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Best-effort cleanup — leaves the user able to retry without
|
||||
// leftover partial files.
|
||||
try { if (File.Exists(tarballPath)) File.Delete(tarballPath); } catch { /* swallow */ }
|
||||
try { if (Directory.Exists(voiceDir) && !IsVoiceDownloaded(info.VoiceId)) Directory.Delete(voiceDir, recursive: true); } catch { /* swallow */ }
|
||||
throw;
|
||||
}
|
||||
finally
|
||||
{
|
||||
try { if (File.Exists(tarballPath)) File.Delete(tarballPath); } catch { /* swallow */ }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute SHA-256 of <paramref name="filePath"/> and compare to
|
||||
/// <paramref name="expectedHex"/>. Throws on mismatch (caller is
|
||||
/// expected to delete the file). Does not echo the actual hash to
|
||||
/// avoid handing attackers a confirmation oracle.
|
||||
/// </summary>
|
||||
private static async Task VerifyHashAsync(string filePath, string expectedHex, string assetName, CancellationToken cancellationToken)
|
||||
{
|
||||
using var sha = System.Security.Cryptography.SHA256.Create();
|
||||
await using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, 81920, useAsync: true);
|
||||
var actual = await sha.ComputeHashAsync(stream, cancellationToken);
|
||||
var actualHex = Convert.ToHexString(actual).ToLowerInvariant();
|
||||
if (!string.Equals(actualHex, expectedHex, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
throw new System.Security.SecurityException(
|
||||
$"Piper voice '{assetName}' failed integrity check. The downloaded tarball does not match the pinned SHA-256.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Delete a downloaded voice directory.</summary>
|
||||
public bool DeleteVoice(string voiceId)
|
||||
{
|
||||
var info = FindVoice(voiceId);
|
||||
var dir = Path.Combine(_voicesDirectory, info.VoiceId);
|
||||
if (!Directory.Exists(dir)) return false;
|
||||
Directory.Delete(dir, recursive: true);
|
||||
_logger.Info($"Deleted Piper voice '{info.VoiceId}'");
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>Total disk usage of a downloaded voice, or 0 if not downloaded.</summary>
|
||||
public long GetVoiceSize(string voiceId)
|
||||
{
|
||||
var info = FindVoice(voiceId);
|
||||
var dir = Path.Combine(_voicesDirectory, info.VoiceId);
|
||||
if (!Directory.Exists(dir)) return 0;
|
||||
long total = 0;
|
||||
foreach (var f in Directory.EnumerateFiles(dir, "*", SearchOption.AllDirectories))
|
||||
{
|
||||
try { total += new FileInfo(f).Length; } catch { /* skip */ }
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Probe the bundled OS tar.exe used by <see cref="ExtractTarBz2"/>.
|
||||
/// Throws a clear error before any network I/O happens so users on
|
||||
/// downlevel Windows aren't left with a half-downloaded tarball.
|
||||
/// </summary>
|
||||
private static void EnsureExtractorAvailable()
|
||||
{
|
||||
try
|
||||
{
|
||||
var psi = new System.Diagnostics.ProcessStartInfo
|
||||
{
|
||||
FileName = "tar",
|
||||
ArgumentList = { "--version" },
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
RedirectStandardOutput = true,
|
||||
RedirectStandardError = true,
|
||||
};
|
||||
using var proc = System.Diagnostics.Process.Start(psi);
|
||||
if (proc == null)
|
||||
{
|
||||
throw new InvalidOperationException("tar.exe not found on PATH.");
|
||||
}
|
||||
proc.WaitForExit(2000);
|
||||
if (!proc.HasExited)
|
||||
{
|
||||
try { proc.Kill(entireProcessTree: true); } catch { /* swallow */ }
|
||||
throw new InvalidOperationException("tar.exe didn't respond to --version.");
|
||||
}
|
||||
if (proc.ExitCode != 0)
|
||||
{
|
||||
throw new InvalidOperationException($"tar.exe --version returned exit code {proc.ExitCode}.");
|
||||
}
|
||||
}
|
||||
catch (System.ComponentModel.Win32Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
"Piper voices need bundled tar (Windows 10 1803+). " +
|
||||
"Your system doesn't have tar on PATH; please update Windows or install a tar utility.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extract a .tar.bz2 archive in-place. We use SharpCompress (already a
|
||||
/// transitive dependency via PiperSharp's ecosystem, but explicit here)
|
||||
/// so we don't need to shell out to tar.exe.
|
||||
/// </summary>
|
||||
private static void ExtractTarBz2(string archivePath, string destinationDir, CancellationToken cancellationToken)
|
||||
{
|
||||
// SharpCompress isn't a direct dep of OpenClaw.Shared today; we
|
||||
// intentionally use the BCL .tar reader on top of a bzip2 stream
|
||||
// from a small inline implementation. Keeping the dep surface small
|
||||
// matters in this assembly because everything here is also referenced
|
||||
// from OpenClaw.Cli.
|
||||
//
|
||||
// .NET 7+ ships System.Formats.Tar; bzip2 is not in the BCL, so we
|
||||
// bring it in via a thin wrapper. For now the simplest-correct path
|
||||
// is to call out to the OS-bundled `tar` (Win10 1803+ ships it),
|
||||
// which transparently handles bz2.
|
||||
var psi = new System.Diagnostics.ProcessStartInfo
|
||||
{
|
||||
FileName = "tar",
|
||||
ArgumentList = { "-xjf", archivePath, "-C", destinationDir, "--strip-components=1" },
|
||||
UseShellExecute = false,
|
||||
CreateNoWindow = true,
|
||||
RedirectStandardError = true,
|
||||
};
|
||||
using var proc = System.Diagnostics.Process.Start(psi)
|
||||
?? throw new InvalidOperationException("Could not start tar to extract Piper voice");
|
||||
|
||||
// Cancellation: kill the tar process if requested.
|
||||
using var reg = cancellationToken.Register(() => { try { proc.Kill(entireProcessTree: true); } catch { /* swallow */ } });
|
||||
|
||||
proc.WaitForExit();
|
||||
if (proc.ExitCode != 0)
|
||||
{
|
||||
var err = proc.StandardError.ReadToEnd();
|
||||
throw new InvalidOperationException($"tar extraction failed (exit {proc.ExitCode}): {err}");
|
||||
}
|
||||
}
|
||||
|
||||
private static PiperVoiceInfo FindVoice(string voiceId)
|
||||
{
|
||||
foreach (var v in AvailableVoices)
|
||||
{
|
||||
if (string.Equals(v.VoiceId, voiceId, StringComparison.OrdinalIgnoreCase))
|
||||
return v;
|
||||
}
|
||||
var available = string.Join(", ", AvailableVoicesIds());
|
||||
throw new ArgumentException($"Unknown Piper voice: '{voiceId}'. Available: {available}");
|
||||
}
|
||||
|
||||
private static IEnumerable<string> AvailableVoicesIds()
|
||||
{
|
||||
foreach (var v in AvailableVoices) yield return v.VoiceId;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Metadata about a Piper voice variant.</summary>
|
||||
/// <param name="VoiceId">Short id, e.g. "en_US-amy-low".</param>
|
||||
/// <param name="DisplayName">Human-readable label for UI.</param>
|
||||
/// <param name="LanguageTag">BCP-47 tag.</param>
|
||||
/// <param name="DownloadUrl">HTTPS URL of the .tar.bz2.</param>
|
||||
/// <param name="Sha256">Pinned lowercase hex SHA-256 of the downloaded
|
||||
/// tarball. MUST be set; downloads are refused when null. See the catalog
|
||||
/// for the "verified on" date — these need re-verification before any
|
||||
/// public release (see Audio_FollowUps.md §2).</param>
|
||||
public sealed record PiperVoiceInfo(
|
||||
string VoiceId,
|
||||
string DisplayName,
|
||||
string LanguageTag,
|
||||
string DownloadUrl,
|
||||
string? Sha256);
|
||||
@ -1,28 +0,0 @@
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Pinned descriptor for the Silero VAD ONNX model that the audio
|
||||
/// pipeline auto-downloads on first use.
|
||||
///
|
||||
/// SECURITY — same fail-closed verification discipline as
|
||||
/// <see cref="WhisperModelManager"/> and <see cref="PiperVoiceManager"/>:
|
||||
/// the runtime checks the downloaded file's SHA-256 against
|
||||
/// <see cref="Sha256"/> before installing it. The pinned hash here was
|
||||
/// captured against the upstream raw URL on 2026-05-05; re-verify from
|
||||
/// an independent source before any public release (Audio_FollowUps.md
|
||||
/// §2 captures the broader signed-manifest plan).
|
||||
/// </summary>
|
||||
public static class SileroVadModelManifest
|
||||
{
|
||||
public const string FileName = "silero_vad.onnx";
|
||||
|
||||
public const string DownloadUrl =
|
||||
"https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx";
|
||||
|
||||
/// <summary>Lowercase hex SHA-256 of the canonical upstream file.</summary>
|
||||
public const string Sha256 = "1a153a22f4509e292a94e67d6f9b85e8deb25b4988682b7e174c65279d8788e3";
|
||||
|
||||
/// <summary>Approximate compressed size in bytes (UI hint; actual size
|
||||
/// is asserted via the SHA-256 check).</summary>
|
||||
public const long ApproximateSizeBytes = 2_327_524;
|
||||
}
|
||||
@ -1,52 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
internal static class SingleFlightDownload
|
||||
{
|
||||
public static Task RunAsync(
|
||||
ConcurrentDictionary<string, Lazy<Task>> inFlight,
|
||||
string key,
|
||||
Func<CancellationToken, Task> startDownload,
|
||||
CancellationToken waitCancellationToken = default)
|
||||
{
|
||||
var candidate = new Lazy<Task>(() =>
|
||||
{
|
||||
try
|
||||
{
|
||||
return startDownload(CancellationToken.None)
|
||||
?? Task.FromException(new InvalidOperationException("Download factory returned null."));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Task.FromException(ex);
|
||||
}
|
||||
}, LazyThreadSafetyMode.ExecutionAndPublication);
|
||||
|
||||
var lazy = inFlight.GetOrAdd(key, candidate);
|
||||
Task task;
|
||||
try
|
||||
{
|
||||
task = lazy.Value;
|
||||
}
|
||||
catch
|
||||
{
|
||||
inFlight.TryRemove(new KeyValuePair<string, Lazy<Task>>(key, lazy));
|
||||
throw;
|
||||
}
|
||||
|
||||
_ = task.ContinueWith(
|
||||
_ => inFlight.TryRemove(new KeyValuePair<string, Lazy<Task>>(key, lazy)),
|
||||
CancellationToken.None,
|
||||
TaskContinuationOptions.ExecuteSynchronously,
|
||||
TaskScheduler.Default);
|
||||
|
||||
return waitCancellationToken.CanBeCanceled
|
||||
? task.WaitAsync(waitCancellationToken)
|
||||
: task;
|
||||
}
|
||||
}
|
||||
@ -1,182 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Whisper.net;
|
||||
using Whisper.net.Ggml;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Wraps Whisper.net for speech-to-text transcription.
|
||||
/// Lazily loads the model on first use and caches the factory.
|
||||
/// Thread-safe: concurrent calls are serialized by a semaphore.
|
||||
/// </summary>
|
||||
public sealed class SpeechToTextService : IDisposable
|
||||
{
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly SemaphoreSlim _gate = new(1, 1);
|
||||
private WhisperFactory? _factory;
|
||||
private string? _loadedModelPath;
|
||||
|
||||
public bool IsModelLoaded => _factory != null;
|
||||
public string? LoadedModelPath => _loadedModelPath;
|
||||
|
||||
public SpeechToTextService(IOpenClawLogger logger)
|
||||
{
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
/// <summary>Load (or reload) the Whisper model from disk.</summary>
|
||||
public void LoadModel(string modelPath)
|
||||
{
|
||||
if (!System.IO.File.Exists(modelPath))
|
||||
throw new System.IO.FileNotFoundException($"Whisper model not found: {modelPath}");
|
||||
|
||||
_factory?.Dispose();
|
||||
_factory = WhisperFactory.FromPath(modelPath);
|
||||
_loadedModelPath = modelPath;
|
||||
_logger.Info($"Whisper model loaded: {modelPath}");
|
||||
}
|
||||
|
||||
/// <summary>Unload the current model and free memory.</summary>
|
||||
public void UnloadModel()
|
||||
{
|
||||
_factory?.Dispose();
|
||||
_factory = null;
|
||||
_loadedModelPath = null;
|
||||
_logger.Info("Whisper model unloaded");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Transcribe raw 16 kHz mono PCM float samples.
|
||||
/// Returns all detected segments.
|
||||
/// </summary>
|
||||
public async Task<List<TranscriptionResult>> TranscribeAsync(
|
||||
float[] samples,
|
||||
string language = "auto",
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (_factory == null)
|
||||
throw new InvalidOperationException("No Whisper model is loaded. Call LoadModel first.");
|
||||
|
||||
await _gate.WaitAsync(cancellationToken);
|
||||
try
|
||||
{
|
||||
// Whisper.net's WithLanguage expects either "auto" or a 2-letter
|
||||
// ISO 639-1 code. The capability validator accepts the broader
|
||||
// BCP-47 shape ("en-US", "zh-Hans-CN") because that's what the
|
||||
// public docs advertise; normalize down here so Whisper actually
|
||||
// sees something it understands.
|
||||
var whisperLang = NormalizeForWhisper(language);
|
||||
var builder = _factory.CreateBuilder()
|
||||
.WithLanguage(whisperLang)
|
||||
.WithThreads(Math.Max(1, Environment.ProcessorCount / 2));
|
||||
|
||||
using var processor = builder.Build();
|
||||
|
||||
using var wavStream = PcmToWavStream(samples, 16000);
|
||||
|
||||
var results = new List<TranscriptionResult>();
|
||||
await foreach (var segment in processor.ProcessAsync(wavStream, cancellationToken))
|
||||
{
|
||||
var text = segment.Text?.Trim();
|
||||
if (!string.IsNullOrEmpty(text))
|
||||
{
|
||||
results.Add(new TranscriptionResult
|
||||
{
|
||||
Text = text,
|
||||
Start = segment.Start,
|
||||
End = segment.End,
|
||||
Language = whisperLang
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
finally
|
||||
{
|
||||
_gate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Convert raw 16-bit PCM float samples to a WAV MemoryStream.
|
||||
/// Whisper.net processes WAV streams natively.
|
||||
/// </summary>
|
||||
private static System.IO.MemoryStream PcmToWavStream(float[] samples, int sampleRate)
|
||||
{
|
||||
var ms = new System.IO.MemoryStream();
|
||||
using var writer = new System.IO.BinaryWriter(ms, System.Text.Encoding.UTF8, leaveOpen: true);
|
||||
|
||||
int bitsPerSample = 16;
|
||||
short channels = 1;
|
||||
int byteRate = sampleRate * channels * bitsPerSample / 8;
|
||||
short blockAlign = (short)(channels * bitsPerSample / 8);
|
||||
int dataSize = samples.Length * blockAlign;
|
||||
|
||||
// RIFF header
|
||||
writer.Write("RIFF"u8);
|
||||
writer.Write(36 + dataSize);
|
||||
writer.Write("WAVE"u8);
|
||||
|
||||
// fmt subchunk
|
||||
writer.Write("fmt "u8);
|
||||
writer.Write(16); // subchunk size
|
||||
writer.Write((short)1); // PCM format
|
||||
writer.Write(channels);
|
||||
writer.Write(sampleRate);
|
||||
writer.Write(byteRate);
|
||||
writer.Write(blockAlign);
|
||||
writer.Write((short)bitsPerSample);
|
||||
|
||||
// data subchunk
|
||||
writer.Write("data"u8);
|
||||
writer.Write(dataSize);
|
||||
|
||||
// Convert float [-1.0, 1.0] to int16
|
||||
foreach (var sample in samples)
|
||||
{
|
||||
var clamped = Math.Clamp(sample, -1.0f, 1.0f);
|
||||
var int16 = (short)(clamped * 32767);
|
||||
writer.Write(int16);
|
||||
}
|
||||
|
||||
writer.Flush();
|
||||
ms.Position = 0;
|
||||
return ms;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reduce a BCP-47 tag (e.g. "en-US", "zh-Hans-CN") to the 2-letter
|
||||
/// language subtag that Whisper.net's WithLanguage call expects.
|
||||
/// "auto" passes through unchanged. Returns "auto" for nulls/whitespace
|
||||
/// or values that don't begin with at least 2 ASCII letters.
|
||||
/// </summary>
|
||||
internal static string NormalizeForWhisper(string? language)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(language)) return "auto";
|
||||
var trimmed = language.Trim();
|
||||
if (string.Equals(trimmed, "auto", StringComparison.OrdinalIgnoreCase)) return "auto";
|
||||
|
||||
// Take everything up to the first '-' (the primary subtag) and lowercase.
|
||||
var dash = trimmed.IndexOf('-');
|
||||
var primary = (dash >= 0 ? trimmed[..dash] : trimmed).ToLowerInvariant();
|
||||
|
||||
// Whisper expects 2-letter ISO 639-1. If the caller handed us a
|
||||
// 3-letter ISO 639-3 tag (no good cross-walk without a table) or
|
||||
// garbage, fall back to auto-detection rather than silently
|
||||
// sending an invalid value.
|
||||
if (primary.Length != 2 || primary[0] is < 'a' or > 'z' || primary[1] is < 'a' or > 'z')
|
||||
return "auto";
|
||||
|
||||
return primary;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_factory?.Dispose();
|
||||
_gate.Dispose();
|
||||
}
|
||||
}
|
||||
@ -1,108 +0,0 @@
|
||||
using System;
|
||||
using Microsoft.ML.OnnxRuntime;
|
||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Voice Activity Detection using Silero VAD ONNX model.
|
||||
/// Processes 16 kHz mono audio in 512-sample chunks (~32 ms each)
|
||||
/// and returns a speech probability per chunk.
|
||||
/// </summary>
|
||||
public sealed class VoiceActivityDetector : IDisposable
|
||||
{
|
||||
private InferenceSession? _session;
|
||||
private float[] _state; // internal RNN state: shape [2, 1, 128]
|
||||
private readonly int _stateSize;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
|
||||
/// <summary>Expected sample rate for input audio.</summary>
|
||||
public const int SampleRate = 16000;
|
||||
|
||||
/// <summary>Number of samples per VAD chunk (512 @ 16 kHz = 32 ms).</summary>
|
||||
public const int ChunkSamples = 512;
|
||||
|
||||
public bool IsLoaded => _session != null;
|
||||
|
||||
public VoiceActivityDetector(IOpenClawLogger logger)
|
||||
{
|
||||
_logger = logger;
|
||||
_stateSize = 2 * 1 * 128;
|
||||
_state = new float[_stateSize];
|
||||
}
|
||||
|
||||
/// <summary>Load the Silero VAD ONNX model from disk.</summary>
|
||||
public void LoadModel(string modelPath)
|
||||
{
|
||||
if (!System.IO.File.Exists(modelPath))
|
||||
throw new System.IO.FileNotFoundException($"VAD model not found: {modelPath}");
|
||||
|
||||
var opts = new SessionOptions
|
||||
{
|
||||
InterOpNumThreads = 1,
|
||||
IntraOpNumThreads = 1,
|
||||
EnableCpuMemArena = true
|
||||
};
|
||||
opts.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_ALL;
|
||||
|
||||
_session?.Dispose();
|
||||
_session = new InferenceSession(modelPath, opts);
|
||||
ResetState();
|
||||
_logger.Info($"Silero VAD model loaded: {modelPath}");
|
||||
}
|
||||
|
||||
/// <summary>Reset the internal RNN state (call between utterances).</summary>
|
||||
public void ResetState()
|
||||
{
|
||||
Array.Clear(_state, 0, _state.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Process a single chunk of audio and return the speech probability (0.0–1.0).
|
||||
/// Input must be exactly <see cref="ChunkSamples"/> float samples at 16 kHz.
|
||||
/// </summary>
|
||||
public float ProcessChunk(float[] audioChunk)
|
||||
{
|
||||
if (_session == null)
|
||||
throw new InvalidOperationException("VAD model not loaded. Call LoadModel first.");
|
||||
|
||||
if (audioChunk.Length != ChunkSamples)
|
||||
throw new ArgumentException($"Audio chunk must be exactly {ChunkSamples} samples, got {audioChunk.Length}");
|
||||
|
||||
// Build input tensors matching Silero VAD v5 expected shapes.
|
||||
// See: github.com/snakers4/silero-vad/blob/master/examples/csharp/SileroVadOnnxModel.cs
|
||||
var inputTensor = new DenseTensor<float>(audioChunk, new[] { 1, ChunkSamples });
|
||||
var srTensor = new DenseTensor<long>(new long[] { SampleRate }, new[] { 1 });
|
||||
var stateTensor = new DenseTensor<float>(_state, new[] { 2, 1, 128 });
|
||||
|
||||
using var results = _session.Run(new List<NamedOnnxValue>
|
||||
{
|
||||
NamedOnnxValue.CreateFromTensor("input", inputTensor),
|
||||
NamedOnnxValue.CreateFromTensor("sr", srTensor),
|
||||
NamedOnnxValue.CreateFromTensor("state", stateTensor)
|
||||
});
|
||||
|
||||
float probability = 0f;
|
||||
foreach (var result in results)
|
||||
{
|
||||
if (result.Name == "output")
|
||||
{
|
||||
var tensor = result.AsTensor<float>();
|
||||
probability = tensor.Length > 0 ? tensor.GetValue(0) : 0f;
|
||||
}
|
||||
else if (result.Name == "stateN")
|
||||
{
|
||||
var newState = result.AsTensor<float>();
|
||||
for (int i = 0; i < _stateSize && i < newState.Length; i++)
|
||||
_state[i] = newState.GetValue(i);
|
||||
}
|
||||
}
|
||||
|
||||
return probability;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
_session?.Dispose();
|
||||
}
|
||||
}
|
||||
@ -1,223 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Net.Http;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Audio;
|
||||
|
||||
/// <summary>
|
||||
/// Manages Whisper GGML model downloads, storage, and lifecycle.
|
||||
/// Models are stored in <c>%APPDATA%\OpenClawTray\models\</c> (or the
|
||||
/// configured data directory).
|
||||
/// </summary>
|
||||
public sealed class WhisperModelManager
|
||||
{
|
||||
private readonly string _modelsDirectory;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
// Per-model single-flight gate: a manual auto-download (VoiceService
|
||||
// EnsureInitializedAsync) and a UI-triggered download for the same
|
||||
// model would otherwise both write the same .tmp file. Static so an
|
||||
// additional manager instance constructed elsewhere (e.g. the Settings
|
||||
// page's status-only check) doesn't bypass the lock.
|
||||
private static readonly ConcurrentDictionary<string, Lazy<Task>> InFlightDownloads = new(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
/// <summary>
|
||||
/// Known Whisper model definitions.
|
||||
///
|
||||
/// SECURITY — pinned SHA-256 hashes (lowercase hex) verified against
|
||||
/// HuggingFace on 2026-05-05. Downloads with a different hash are
|
||||
/// rejected and the partial file is deleted. Before any public release:
|
||||
/// re-verify each hash from an independent source and document the
|
||||
/// provenance in Audio_FollowUps.md §2 (also consider replacing this
|
||||
/// inline table with a signed manifest).
|
||||
/// </summary>
|
||||
public static readonly WhisperModelInfo[] AvailableModels =
|
||||
[
|
||||
new("ggml-tiny.bin", "tiny", 77_691_713, "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin",
|
||||
"be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21"),
|
||||
new("ggml-base.bin", "base", 147_951_465, "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin",
|
||||
"60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe"),
|
||||
new("ggml-small.bin", "small", 487_601_967, "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
|
||||
"1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b"),
|
||||
];
|
||||
|
||||
public WhisperModelManager(string dataDirectory, IOpenClawLogger logger)
|
||||
{
|
||||
_modelsDirectory = Path.Combine(dataDirectory, "models");
|
||||
_logger = logger;
|
||||
Directory.CreateDirectory(_modelsDirectory);
|
||||
}
|
||||
|
||||
/// <summary>Full file path for a given model name.</summary>
|
||||
public string GetModelPath(string modelName)
|
||||
{
|
||||
var info = FindModel(modelName);
|
||||
return Path.Combine(_modelsDirectory, info.FileName);
|
||||
}
|
||||
|
||||
/// <summary>Check whether a model file already exists on disk.</summary>
|
||||
public bool IsModelDownloaded(string modelName)
|
||||
{
|
||||
var path = GetModelPath(modelName);
|
||||
return File.Exists(path);
|
||||
}
|
||||
|
||||
/// <summary>Get the size of a downloaded model, or 0 if not downloaded.</summary>
|
||||
public long GetModelSize(string modelName)
|
||||
{
|
||||
var path = GetModelPath(modelName);
|
||||
return File.Exists(path) ? new FileInfo(path).Length : 0;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Download a model from HuggingFace if not already present.
|
||||
/// Reports progress as bytes downloaded / total bytes.
|
||||
/// Per-model single-flight: concurrent calls for the same model await
|
||||
/// the in-flight download instead of racing on the same .tmp file.
|
||||
/// </summary>
|
||||
public Task DownloadModelAsync(
|
||||
string modelName,
|
||||
IProgress<(long downloaded, long total)>? progress = null,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var info = FindModel(modelName);
|
||||
var destPath = Path.Combine(_modelsDirectory, info.FileName);
|
||||
|
||||
if (File.Exists(destPath))
|
||||
{
|
||||
_logger.Info($"Model '{modelName}' already exists at {destPath}");
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
// Use the canonical key (FileName) so two callers that pass "base"
|
||||
// and "ggml-base.bin" still coalesce.
|
||||
var key = info.FileName;
|
||||
return SingleFlightDownload.RunAsync(
|
||||
InFlightDownloads,
|
||||
key,
|
||||
token => DownloadModelCoreAsync(info, destPath, progress, token),
|
||||
cancellationToken);
|
||||
}
|
||||
|
||||
private async Task DownloadModelCoreAsync(
|
||||
WhisperModelInfo info,
|
||||
string destPath,
|
||||
IProgress<(long downloaded, long total)>? progress,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// SECURITY: a missing pinned hash is treated as a hard failure so we
|
||||
// never install an unverified asset. The catalog above pins all
|
||||
// shipped models; if you add a new one without a hash, this is the
|
||||
// place that refuses to download it. See Audio_FollowUps.md §2.
|
||||
if (string.IsNullOrWhiteSpace(info.Sha256))
|
||||
{
|
||||
throw new InvalidOperationException(
|
||||
$"Whisper model '{info.Name}' has no pinned SHA-256; refusing to download. " +
|
||||
"Add a verified hash to AvailableModels before enabling this model.");
|
||||
}
|
||||
|
||||
_logger.Info($"Downloading model '{info.Name}' from {info.DownloadUrl}");
|
||||
var tempPath = destPath + ".tmp";
|
||||
|
||||
try
|
||||
{
|
||||
using var httpClient = new HttpClient();
|
||||
httpClient.Timeout = TimeSpan.FromMinutes(30);
|
||||
using var response = await httpClient.GetAsync(info.DownloadUrl, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
|
||||
response.EnsureSuccessStatusCode();
|
||||
|
||||
var totalBytes = response.Content.Headers.ContentLength ?? info.ApproximateSizeBytes;
|
||||
using (var contentStream = await response.Content.ReadAsStreamAsync(cancellationToken))
|
||||
using (var fileStream = new FileStream(tempPath, FileMode.Create, FileAccess.Write, FileShare.None, 81920))
|
||||
{
|
||||
var buffer = new byte[81920];
|
||||
long downloadedBytes = 0;
|
||||
int bytesRead;
|
||||
|
||||
while ((bytesRead = await contentStream.ReadAsync(buffer, cancellationToken)) > 0)
|
||||
{
|
||||
await fileStream.WriteAsync(buffer.AsMemory(0, bytesRead), cancellationToken);
|
||||
downloadedBytes += bytesRead;
|
||||
progress?.Report((downloadedBytes, totalBytes));
|
||||
}
|
||||
|
||||
await fileStream.FlushAsync(cancellationToken);
|
||||
}
|
||||
|
||||
// SECURITY: verify SHA-256 BEFORE the atomic rename, so a
|
||||
// tampered file never lands at the canonical path. On mismatch
|
||||
// we delete the temp file (no partial install) and surface a
|
||||
// sanitized error — we deliberately do NOT echo the actual
|
||||
// hash because that gives an attacker a confirmation oracle.
|
||||
await VerifyHashAsync(tempPath, info.Sha256, info.Name, cancellationToken);
|
||||
|
||||
File.Move(tempPath, destPath, overwrite: true);
|
||||
_logger.Info($"Model '{info.Name}' downloaded and verified");
|
||||
}
|
||||
catch
|
||||
{
|
||||
// Clean up partial download
|
||||
try { if (File.Exists(tempPath)) File.Delete(tempPath); } catch { /* best effort */ }
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute SHA-256 of <paramref name="filePath"/> and compare to
|
||||
/// <paramref name="expectedHex"/>. Throws on mismatch (and the caller
|
||||
/// is expected to delete the file). Does not echo the actual hash to
|
||||
/// avoid handing attackers a confirmation oracle.
|
||||
/// </summary>
|
||||
private static async Task VerifyHashAsync(string filePath, string expectedHex, string assetName, CancellationToken cancellationToken)
|
||||
{
|
||||
using var sha = System.Security.Cryptography.SHA256.Create();
|
||||
await using var stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, 81920, useAsync: true);
|
||||
var actual = await sha.ComputeHashAsync(stream, cancellationToken);
|
||||
var actualHex = Convert.ToHexString(actual).ToLowerInvariant();
|
||||
if (!string.Equals(actualHex, expectedHex, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
throw new System.Security.SecurityException(
|
||||
$"Whisper model '{assetName}' failed integrity check. The downloaded file does not match the pinned SHA-256.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Delete a downloaded model file.</summary>
|
||||
public bool DeleteModel(string modelName)
|
||||
{
|
||||
var path = GetModelPath(modelName);
|
||||
if (!File.Exists(path)) return false;
|
||||
File.Delete(path);
|
||||
_logger.Info($"Deleted model '{modelName}'");
|
||||
return true;
|
||||
}
|
||||
|
||||
private static WhisperModelInfo FindModel(string modelName)
|
||||
{
|
||||
foreach (var m in AvailableModels)
|
||||
{
|
||||
if (string.Equals(m.Name, modelName, StringComparison.OrdinalIgnoreCase))
|
||||
return m;
|
||||
}
|
||||
throw new ArgumentException($"Unknown model: '{modelName}'. Available: tiny, base, small");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Metadata about a Whisper model variant.</summary>
|
||||
/// <param name="FileName">On-disk filename (e.g. "ggml-base.bin").</param>
|
||||
/// <param name="Name">Short identifier used by callers ("tiny" / "base" / "small").</param>
|
||||
/// <param name="ApproximateSizeBytes">Approximate size hint for UI; the
|
||||
/// actual size is asserted against <paramref name="Sha256"/> after download.</param>
|
||||
/// <param name="DownloadUrl">HTTPS URL of the model file.</param>
|
||||
/// <param name="Sha256">Pinned lowercase hex SHA-256 of the downloaded file.
|
||||
/// MUST be set; downloads are refused when null. See the catalog for the
|
||||
/// "verified on" date — these need re-verification before any public
|
||||
/// release (see Audio_FollowUps.md §2).</param>
|
||||
public sealed record WhisperModelInfo(
|
||||
string FileName,
|
||||
string Name,
|
||||
long ApproximateSizeBytes,
|
||||
string DownloadUrl,
|
||||
string? Sha256);
|
||||
@ -1,154 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// App-level capability exposing navigation, status, and configuration
|
||||
/// through the MCP server for programmatic testing and CLI agents.
|
||||
/// </summary>
|
||||
public class AppCapability : NodeCapabilityBase
|
||||
{
|
||||
public override string Category => "app";
|
||||
|
||||
private static readonly string[] _commands = new[]
|
||||
{
|
||||
"app.navigate",
|
||||
"app.status",
|
||||
"app.sessions",
|
||||
"app.agents",
|
||||
"app.nodes",
|
||||
"app.config.get",
|
||||
"app.settings.get",
|
||||
"app.settings.set",
|
||||
"app.menu",
|
||||
"app.search",
|
||||
};
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
// Handler delegates — wired up by App.xaml.cs after construction.
|
||||
public Func<string, Task<object?>>? NavigateHandler;
|
||||
public Func<object?>? StatusHandler;
|
||||
public Func<string?, Task<object?>>? SessionsHandler;
|
||||
public Func<Task<object?>>? AgentsHandler;
|
||||
public Func<object?>? NodesHandler;
|
||||
public Func<string?, Task<object?>>? ConfigGetHandler;
|
||||
public Func<string, object?>? SettingsGetHandler;
|
||||
public Func<string, string, object?>? SettingsSetHandler;
|
||||
public Func<object?>? MenuHandler;
|
||||
public Func<string, object?>? SearchHandler;
|
||||
|
||||
public AppCapability(IOpenClawLogger logger) : base(logger) { }
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
return request.Command switch
|
||||
{
|
||||
"app.navigate" => await HandleNavigate(request),
|
||||
"app.status" => HandleStatus(),
|
||||
"app.sessions" => await HandleSessions(request),
|
||||
"app.agents" => await HandleAgents(),
|
||||
"app.nodes" => HandleNodes(),
|
||||
"app.config.get" => await HandleConfigGet(request),
|
||||
"app.settings.get" => HandleSettingsGet(request),
|
||||
"app.settings.set" => HandleSettingsSet(request),
|
||||
"app.menu" => HandleMenu(),
|
||||
"app.search" => HandleSearch(request),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleNavigate(NodeInvokeRequest request)
|
||||
{
|
||||
var page = GetStringArg(request.Args, "page");
|
||||
if (string.IsNullOrEmpty(page))
|
||||
return Error("Missing required arg: page");
|
||||
if (NavigateHandler == null)
|
||||
return Error("Navigate handler not registered");
|
||||
var result = await NavigateHandler(page);
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleStatus()
|
||||
{
|
||||
if (StatusHandler == null)
|
||||
return Error("Status handler not registered");
|
||||
return Success(StatusHandler());
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleSessions(NodeInvokeRequest request)
|
||||
{
|
||||
var agentId = GetStringArg(request.Args, "agentId");
|
||||
if (SessionsHandler == null)
|
||||
return Error("Sessions handler not registered");
|
||||
var result = await SessionsHandler(agentId);
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleAgents()
|
||||
{
|
||||
if (AgentsHandler == null)
|
||||
return Error("Agents handler not registered");
|
||||
var result = await AgentsHandler();
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleNodes()
|
||||
{
|
||||
if (NodesHandler == null)
|
||||
return Error("Nodes handler not registered");
|
||||
return Success(NodesHandler());
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleConfigGet(NodeInvokeRequest request)
|
||||
{
|
||||
var path = GetStringArg(request.Args, "path");
|
||||
if (ConfigGetHandler == null)
|
||||
return Error("Config handler not registered");
|
||||
var result = await ConfigGetHandler(path);
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleSettingsGet(NodeInvokeRequest request)
|
||||
{
|
||||
var name = GetStringArg(request.Args, "name");
|
||||
if (string.IsNullOrEmpty(name))
|
||||
return Error("Missing required arg: name");
|
||||
if (SettingsGetHandler == null)
|
||||
return Error("Settings handler not registered");
|
||||
return Success(SettingsGetHandler(name));
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleSettingsSet(NodeInvokeRequest request)
|
||||
{
|
||||
var name = GetStringArg(request.Args, "name");
|
||||
var value = GetStringArg(request.Args, "value");
|
||||
if (string.IsNullOrEmpty(name))
|
||||
return Error("Missing required arg: name");
|
||||
if (value == null)
|
||||
return Error("Missing required arg: value");
|
||||
if (SettingsSetHandler == null)
|
||||
return Error("Settings handler not registered");
|
||||
return Success(SettingsSetHandler(name, value));
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleMenu()
|
||||
{
|
||||
if (MenuHandler == null)
|
||||
return Error("Menu handler not registered");
|
||||
return Success(MenuHandler());
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleSearch(NodeInvokeRequest request)
|
||||
{
|
||||
var query = GetStringArg(request.Args, "query");
|
||||
if (string.IsNullOrEmpty(query))
|
||||
return Error("Missing required arg: query");
|
||||
if (SearchHandler == null)
|
||||
return Error("Search handler not registered");
|
||||
return Success(SearchHandler(query));
|
||||
}
|
||||
}
|
||||
@ -1,295 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
public class BrowserProxyCapability : NodeCapabilityBase
|
||||
{
|
||||
private const int DefaultTimeoutMs = 20_000;
|
||||
private const int MaxTimeoutMs = 120_000;
|
||||
private const long MaxFileBytes = 10 * 1024 * 1024;
|
||||
private static readonly string[] s_commands = ["browser.proxy"];
|
||||
private readonly string _gatewayUrl;
|
||||
private readonly string _bearerToken;
|
||||
private readonly int? _sshRemoteGatewayPort;
|
||||
private readonly HttpClient _httpClient;
|
||||
|
||||
public BrowserProxyCapability(
|
||||
IOpenClawLogger logger,
|
||||
string gatewayUrl,
|
||||
string? bearerToken,
|
||||
HttpMessageHandler? handler = null,
|
||||
int? sshRemoteGatewayPort = null) : base(logger)
|
||||
{
|
||||
_gatewayUrl = gatewayUrl;
|
||||
_bearerToken = bearerToken ?? "";
|
||||
_sshRemoteGatewayPort = sshRemoteGatewayPort;
|
||||
_httpClient = handler == null ? new HttpClient() : new HttpClient(handler);
|
||||
}
|
||||
|
||||
public override string Category => "browser";
|
||||
public override IReadOnlyList<string> Commands => s_commands;
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
if (!string.Equals(request.Command, "browser.proxy", StringComparison.OrdinalIgnoreCase))
|
||||
return Error($"Unknown command: {request.Command}");
|
||||
|
||||
if (!TryResolveControlEndpoint(_gatewayUrl, out var controlPort, out var endpointError))
|
||||
return Error(endpointError);
|
||||
|
||||
var method = GetStringArg(request.Args, "method", "GET")?.ToUpperInvariant() ?? "GET";
|
||||
if (method is not ("GET" or "POST" or "DELETE"))
|
||||
method = "GET";
|
||||
|
||||
var rawPath = GetStringArg(request.Args, "path", "");
|
||||
if (!TryNormalizePath(rawPath, out var path, out var pathError))
|
||||
return Error(pathError);
|
||||
|
||||
var timeoutMs = Math.Clamp(GetIntArg(request.Args, "timeoutMs", DefaultTimeoutMs), 1, MaxTimeoutMs);
|
||||
using var timeoutCts = new System.Threading.CancellationTokenSource(TimeSpan.FromMilliseconds(timeoutMs));
|
||||
|
||||
var uri = BuildUri(controlPort, path, request.Args);
|
||||
try
|
||||
{
|
||||
using var httpRequest = CreateHttpRequest(method, uri, request.Args, usePasswordAuth: false);
|
||||
using var response = await _httpClient.SendAsync(httpRequest, timeoutCts.Token);
|
||||
var responseText = await response.Content.ReadAsStringAsync(timeoutCts.Token);
|
||||
|
||||
if (response.StatusCode == HttpStatusCode.Unauthorized &&
|
||||
!string.IsNullOrWhiteSpace(_bearerToken))
|
||||
{
|
||||
using var passwordRequest = CreateHttpRequest(method, uri, request.Args, usePasswordAuth: true);
|
||||
using var passwordResponse = await _httpClient.SendAsync(passwordRequest, timeoutCts.Token);
|
||||
var passwordResponseText = await passwordResponse.Content.ReadAsStringAsync(timeoutCts.Token);
|
||||
return BuildProxyResponse(passwordResponse, passwordResponseText);
|
||||
}
|
||||
|
||||
return BuildProxyResponse(response, responseText);
|
||||
}
|
||||
catch (TaskCanceledException)
|
||||
{
|
||||
return Error($"browser proxy timed out for {method} {path} after {timeoutMs}ms. {BuildReachabilityGuidance(controlPort, _sshRemoteGatewayPort)}");
|
||||
}
|
||||
catch (HttpRequestException ex)
|
||||
{
|
||||
return Error($"Browser control host is not reachable on 127.0.0.1:{controlPort}: {ex.Message}. {BuildReachabilityGuidance(controlPort, _sshRemoteGatewayPort)}");
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
return Error($"Browser control host returned invalid JSON: {ex.Message}");
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
return Error($"Browser proxy file read failed: {ex.Message}");
|
||||
}
|
||||
catch (UnauthorizedAccessException ex)
|
||||
{
|
||||
return Error($"Browser proxy file read denied: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private HttpRequestMessage CreateHttpRequest(string method, Uri uri, JsonElement args, bool usePasswordAuth)
|
||||
{
|
||||
var httpRequest = new HttpRequestMessage(new HttpMethod(method), uri);
|
||||
if (!string.IsNullOrWhiteSpace(_bearerToken))
|
||||
{
|
||||
if (usePasswordAuth)
|
||||
{
|
||||
httpRequest.Headers.TryAddWithoutValidation("x-openclaw-password", _bearerToken);
|
||||
httpRequest.Headers.Authorization = new AuthenticationHeaderValue(
|
||||
"Basic",
|
||||
Convert.ToBase64String(Encoding.UTF8.GetBytes($":{_bearerToken}")));
|
||||
}
|
||||
else
|
||||
{
|
||||
httpRequest.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _bearerToken);
|
||||
}
|
||||
}
|
||||
|
||||
if (method is "POST" or "DELETE" &&
|
||||
args.ValueKind == JsonValueKind.Object &&
|
||||
args.TryGetProperty("body", out var body))
|
||||
{
|
||||
httpRequest.Content = new StringContent(body.GetRawText(), Encoding.UTF8, "application/json");
|
||||
}
|
||||
|
||||
return httpRequest;
|
||||
}
|
||||
|
||||
private NodeInvokeResponse BuildProxyResponse(HttpResponseMessage response, string responseText)
|
||||
{
|
||||
if (response.StatusCode == HttpStatusCode.Unauthorized)
|
||||
return Error(BuildAuthenticationFailureGuidance());
|
||||
if (!response.IsSuccessStatusCode)
|
||||
return Error(string.IsNullOrWhiteSpace(responseText) ? $"Browser control host returned HTTP {(int)response.StatusCode}" : responseText);
|
||||
|
||||
using var doc = string.IsNullOrWhiteSpace(responseText)
|
||||
? JsonDocument.Parse("{}")
|
||||
: JsonDocument.Parse(responseText);
|
||||
var result = doc.RootElement.Clone();
|
||||
var files = TryCollectFiles(result);
|
||||
|
||||
return files.Count == 0
|
||||
? Success(new { result })
|
||||
: Success(new { result, files });
|
||||
}
|
||||
|
||||
private string BuildAuthenticationFailureGuidance()
|
||||
{
|
||||
return string.IsNullOrWhiteSpace(_bearerToken)
|
||||
? "Browser control host rejected the unauthenticated request. Windows has no gateway shared token saved for browser-control auth; enter the matching gateway token in Settings or run the browser-control host with compatible auth."
|
||||
: "Browser control host rejected authentication. Verify the gateway token saved in Settings matches the browser-control host auth token or password.";
|
||||
}
|
||||
|
||||
private static bool TryResolveControlEndpoint(string gatewayUrl, out int controlPort, out string error)
|
||||
{
|
||||
controlPort = 0;
|
||||
error = "";
|
||||
if (!Uri.TryCreate(gatewayUrl, UriKind.Absolute, out var gatewayUri) || gatewayUri.Port <= 0)
|
||||
{
|
||||
error = "Browser proxy requires a gateway URL with an explicit local port.";
|
||||
return false;
|
||||
}
|
||||
|
||||
controlPort = gatewayUri.Port + 2;
|
||||
if (controlPort > 65535)
|
||||
{
|
||||
error = "Browser proxy control port is outside the valid TCP port range.";
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static string BuildReachabilityGuidance(int localControlPort, int? sshRemoteGatewayPort)
|
||||
{
|
||||
var sshForward = sshRemoteGatewayPort is >= 1 and <= 65533
|
||||
? $"ssh -N -L {localControlPort}:127.0.0.1:{sshRemoteGatewayPort.Value + 2} <user>@<host>"
|
||||
: $"ssh -N -L {localControlPort}:127.0.0.1:<remote-gateway-port+2> <user>@<host>";
|
||||
|
||||
return $"Start the local OpenClaw browser control host on gateway port + 2 ({localControlPort}). If the gateway is reached through SSH, also forward the browser-control port with: {sshForward}";
|
||||
}
|
||||
|
||||
private static bool TryNormalizePath(string? rawPath, out string path, out string error)
|
||||
{
|
||||
path = "";
|
||||
error = "";
|
||||
var candidate = rawPath?.Trim() ?? "";
|
||||
if (candidate.Length == 0)
|
||||
{
|
||||
error = "INVALID_REQUEST: path required";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (candidate.Contains("://", StringComparison.Ordinal) || candidate.StartsWith("//", StringComparison.Ordinal))
|
||||
{
|
||||
error = "INVALID_REQUEST: browser.proxy path must be a local control path, not a URL";
|
||||
return false;
|
||||
}
|
||||
|
||||
path = candidate.StartsWith("/", StringComparison.Ordinal) ? candidate : "/" + candidate;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static Uri BuildUri(int controlPort, string path, JsonElement args)
|
||||
{
|
||||
var builder = new UriBuilder("http", "127.0.0.1", controlPort, path);
|
||||
var query = new List<string>();
|
||||
if (args.ValueKind != JsonValueKind.Object)
|
||||
return builder.Uri;
|
||||
|
||||
if (args.TryGetProperty("query", out var queryElement) && queryElement.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
foreach (var prop in queryElement.EnumerateObject())
|
||||
{
|
||||
if (prop.Value.ValueKind is JsonValueKind.Null or JsonValueKind.Undefined)
|
||||
continue;
|
||||
|
||||
var value = prop.Value.ValueKind == JsonValueKind.String
|
||||
? prop.Value.GetString()
|
||||
: prop.Value.ToString();
|
||||
if (value != null)
|
||||
query.Add($"{Uri.EscapeDataString(prop.Name)}={Uri.EscapeDataString(value)}");
|
||||
}
|
||||
}
|
||||
|
||||
if (args.TryGetProperty("profile", out var profileElement) &&
|
||||
profileElement.ValueKind == JsonValueKind.String &&
|
||||
!string.IsNullOrWhiteSpace(profileElement.GetString()))
|
||||
{
|
||||
query.Add($"profile={Uri.EscapeDataString(profileElement.GetString()!)}");
|
||||
}
|
||||
|
||||
builder.Query = string.Join("&", query);
|
||||
return builder.Uri;
|
||||
}
|
||||
|
||||
private static List<object> TryCollectFiles(JsonElement result)
|
||||
{
|
||||
var paths = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
CollectPath(result, "path", paths);
|
||||
CollectPath(result, "imagePath", paths);
|
||||
if (result.ValueKind == JsonValueKind.Object &&
|
||||
result.TryGetProperty("download", out var download) &&
|
||||
download.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
CollectPath(download, "path", paths);
|
||||
}
|
||||
|
||||
var files = new List<object>();
|
||||
foreach (var path in paths)
|
||||
{
|
||||
var info = new FileInfo(path);
|
||||
if (!info.Exists || (info.Attributes & FileAttributes.Directory) == FileAttributes.Directory)
|
||||
continue;
|
||||
if (info.Length > MaxFileBytes)
|
||||
throw new IOException($"browser proxy file exceeds {MaxFileBytes / (1024 * 1024)}MB: {path}");
|
||||
|
||||
var bytes = File.ReadAllBytes(path);
|
||||
files.Add(new
|
||||
{
|
||||
path,
|
||||
base64 = Convert.ToBase64String(bytes),
|
||||
mimeType = GuessMimeType(path)
|
||||
});
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
private static void CollectPath(JsonElement source, string propertyName, HashSet<string> paths)
|
||||
{
|
||||
if (source.ValueKind != JsonValueKind.Object ||
|
||||
!source.TryGetProperty(propertyName, out var value) ||
|
||||
value.ValueKind != JsonValueKind.String)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
var path = value.GetString();
|
||||
if (!string.IsNullOrWhiteSpace(path))
|
||||
paths.Add(path);
|
||||
}
|
||||
|
||||
private static string? GuessMimeType(string path)
|
||||
{
|
||||
return Path.GetExtension(path).ToLowerInvariant() switch
|
||||
{
|
||||
".png" => "image/png",
|
||||
".jpg" or ".jpeg" => "image/jpeg",
|
||||
".pdf" => "application/pdf",
|
||||
".txt" => "text/plain",
|
||||
".json" => "application/json",
|
||||
".html" or ".htm" => "text/html",
|
||||
_ => null
|
||||
};
|
||||
}
|
||||
}
|
||||
@ -28,9 +28,6 @@ public class CameraCapability : NodeCapabilityBase
|
||||
public CameraCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
}
|
||||
|
||||
private static int Clamp(int value, int min, int max)
|
||||
=> value < min ? min : (value > max ? max : value);
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
@ -60,23 +57,16 @@ public class CameraCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Camera list failed", ex);
|
||||
return Error("List failed");
|
||||
return Error($"List failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
// Boundary clamps — reject extreme/negative caller values up-front.
|
||||
private const int MinCameraDimension = 16;
|
||||
private const int MaxCameraWidth = 4096;
|
||||
private const int MinQuality = 1;
|
||||
private const int MaxQuality = 100;
|
||||
private const int MaxClipDurationMs = 60_000;
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleSnapAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var deviceId = GetStringArg(request.Args, "deviceId");
|
||||
var format = GetStringArg(request.Args, "format", "jpeg");
|
||||
var maxWidth = Clamp(GetIntArg(request.Args, "maxWidth", 1280), MinCameraDimension, MaxCameraWidth);
|
||||
var quality = Clamp(GetIntArg(request.Args, "quality", 80), MinQuality, MaxQuality);
|
||||
var maxWidth = GetIntArg(request.Args, "maxWidth", 1280);
|
||||
var quality = GetIntArg(request.Args, "quality", 80);
|
||||
|
||||
Logger.Info($"camera.snap: deviceId={deviceId ?? "(default)"}, format={format}");
|
||||
|
||||
@ -106,16 +96,14 @@ public class CameraCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Camera snap failed", ex);
|
||||
return Error("Snap failed");
|
||||
return Error($"Snap failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleClipAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var deviceId = GetStringArg(request.Args, "deviceId");
|
||||
// Floor at 100ms — anything shorter is meaningless and a 0/negative
|
||||
// value previously slipped through the `Math.Min` cap.
|
||||
var durationMs = Clamp(GetIntArg(request.Args, "durationMs", 3000), 100, MaxClipDurationMs);
|
||||
var durationMs = Math.Min(GetIntArg(request.Args, "durationMs", 3000), 60000);
|
||||
var includeAudio = GetBoolArg(request.Args, "includeAudio", true);
|
||||
var format = GetStringArg(request.Args, "format", "mp4") ?? "mp4";
|
||||
|
||||
@ -147,7 +135,7 @@ public class CameraCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Camera clip failed", ex);
|
||||
return Error("Clip failed");
|
||||
return Error($"Clip failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,10 +1,7 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using Microsoft.Win32.SafeHandles;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
@ -24,9 +21,7 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
"canvas.snapshot",
|
||||
"canvas.a2ui.push",
|
||||
"canvas.a2ui.pushJSONL",
|
||||
"canvas.a2ui.reset",
|
||||
"canvas.a2ui.dump",
|
||||
"canvas.caps",
|
||||
"canvas.a2ui.reset"
|
||||
};
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
@ -34,75 +29,15 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
// Events for UI to handle
|
||||
public event EventHandler<CanvasPresentArgs>? PresentRequested;
|
||||
public event EventHandler? HideRequested;
|
||||
/// <summary>
|
||||
/// Subscriber decides how to handle a navigate request and returns the
|
||||
/// opener that actually serviced it: <c>"canvas"</c> if an in-process
|
||||
/// WebView2 frame navigated, <c>"browser"</c> if the URL was handed to the
|
||||
/// OS default browser. Throwing surfaces as an error to the gateway.
|
||||
/// Single-subscriber: same multi-handler hazard as the other Func events.
|
||||
/// </summary>
|
||||
private Func<string, Task<string>>? _navigateRequested;
|
||||
public event Func<string, Task<string>> NavigateRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _navigateRequested, value, nameof(NavigateRequested));
|
||||
remove => ClearSingleHandler(ref _navigateRequested, value);
|
||||
}
|
||||
// Func-based "events" are inherently single-handler — multi-subscribe to a
|
||||
// Delegate.Combine'd Func silently invokes only the last subscriber's
|
||||
// return value, hiding the others. Expose them as single-subscriber events
|
||||
// that throw on a second subscribe so this is loud.
|
||||
private Func<string, Task<string>>? _evalRequested;
|
||||
public event Func<string, Task<string>> EvalRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _evalRequested, value, nameof(EvalRequested));
|
||||
remove => ClearSingleHandler(ref _evalRequested, value);
|
||||
}
|
||||
private Func<CanvasSnapshotArgs, Task<string>>? _snapshotRequested;
|
||||
public event Func<CanvasSnapshotArgs, Task<string>> SnapshotRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _snapshotRequested, value, nameof(SnapshotRequested));
|
||||
remove => ClearSingleHandler(ref _snapshotRequested, value);
|
||||
}
|
||||
public event EventHandler<string>? NavigateRequested;
|
||||
public event Func<string, Task<string>>? EvalRequested;
|
||||
public event Func<CanvasSnapshotArgs, Task<string>>? SnapshotRequested;
|
||||
public event EventHandler<CanvasA2UIArgs>? A2UIPushRequested;
|
||||
public event EventHandler? A2UIResetRequested;
|
||||
/// <summary>Returns a JSON state dump of the native A2UI surface graph.</summary>
|
||||
private Func<Task<string>>? _a2uiDumpRequested;
|
||||
public event Func<Task<string>> A2UIDumpRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _a2uiDumpRequested, value, nameof(A2UIDumpRequested));
|
||||
remove => ClearSingleHandler(ref _a2uiDumpRequested, value);
|
||||
}
|
||||
/// <summary>Returns a JSON capability summary describing which canvas operations are supported.</summary>
|
||||
private Func<Task<string>>? _capsRequested;
|
||||
public event Func<Task<string>> CapsRequested
|
||||
{
|
||||
add => SetSingleHandler(ref _capsRequested, value, nameof(CapsRequested));
|
||||
remove => ClearSingleHandler(ref _capsRequested, value);
|
||||
}
|
||||
|
||||
private static void SetSingleHandler<T>(ref T? slot, T value, string name) where T : Delegate
|
||||
{
|
||||
if (slot != null && !ReferenceEquals(slot, value))
|
||||
throw new InvalidOperationException($"{name} accepts only one subscriber. Detach the previous handler first.");
|
||||
slot = value;
|
||||
}
|
||||
private static void ClearSingleHandler<T>(ref T? slot, T value) where T : Delegate
|
||||
{
|
||||
if (ReferenceEquals(slot, value)) slot = null;
|
||||
}
|
||||
|
||||
public CanvasCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
}
|
||||
|
||||
private static int Clamp(int value, int min, int max)
|
||||
=> value < min ? min : (value > max ? max : value);
|
||||
|
||||
private static int ClampPosition(int value)
|
||||
{
|
||||
if (value == -1) return -1; // documented "center" sentinel
|
||||
return value < MinPosition ? MinPosition : (value > MaxPosition ? MaxPosition : value);
|
||||
}
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
@ -116,37 +51,18 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
"canvas.a2ui.push" => HandleA2UIPush(request),
|
||||
"canvas.a2ui.pushJSONL" => HandleA2UIPush(request),
|
||||
"canvas.a2ui.reset" => HandleA2UIReset(request),
|
||||
"canvas.a2ui.dump" => await HandleA2UIDumpAsync(),
|
||||
"canvas.caps" => await HandleCapsAsync(),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
// Window-bounds clamps. -1 is the documented "center" sentinel for x/y so
|
||||
// we preserve negatives below MinPosition by routing them to -1.
|
||||
private const int MinDimension = 100;
|
||||
private const int MaxDimension = 7680;
|
||||
private const int MinPosition = -16384;
|
||||
private const int MaxPosition = 16384;
|
||||
private const int MinSnapshotWidth = 32;
|
||||
private const int MaxSnapshotWidth = 7680;
|
||||
private const int MinQuality = 1;
|
||||
private const int MaxQuality = 100;
|
||||
|
||||
// A2UI push caps. Inline transport in McpHttpServer caps at 4 MiB; jsonlPath
|
||||
// bypasses that, so re-enforce here. The line-count cap protects the UI thread
|
||||
// from a single push that explodes into thousands of dispatcher posts.
|
||||
internal const long MaxA2UIJsonlBytes = 4L * 1024 * 1024;
|
||||
internal const int MaxA2UIJsonlLines = 4096;
|
||||
|
||||
private Task<NodeInvokeResponse> HandlePresentAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var url = GetStringArg(request.Args, "url");
|
||||
var html = GetStringArg(request.Args, "html");
|
||||
var width = Clamp(GetIntArg(request.Args, "width", 800), MinDimension, MaxDimension);
|
||||
var height = Clamp(GetIntArg(request.Args, "height", 600), MinDimension, MaxDimension);
|
||||
var x = ClampPosition(GetIntArg(request.Args, "x", -1)); // -1 = center
|
||||
var y = ClampPosition(GetIntArg(request.Args, "y", -1));
|
||||
var width = GetIntArg(request.Args, "width", 800);
|
||||
var height = GetIntArg(request.Args, "height", 600);
|
||||
var x = GetIntArg(request.Args, "x", -1); // -1 = center
|
||||
var y = GetIntArg(request.Args, "y", -1);
|
||||
var title = GetStringArg(request.Args, "title", "Canvas");
|
||||
var alwaysOnTop = GetBoolArg(request.Args, "alwaysOnTop", false);
|
||||
|
||||
@ -176,49 +92,16 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleNavigateAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var rawUrl = GetStringArg(request.Args, "url");
|
||||
if (string.IsNullOrEmpty(rawUrl))
|
||||
var url = GetStringArg(request.Args, "url");
|
||||
if (string.IsNullOrEmpty(url))
|
||||
{
|
||||
return Error("Missing url parameter");
|
||||
}
|
||||
|
||||
// Validate up front so the OS-level Process.Start in the subscriber
|
||||
// can't be tricked into shell-executing javascript:/file:/app-protocol
|
||||
// URIs. The subscriber re-validates as defense-in-depth.
|
||||
if (!HttpUrlValidator.TryParse(rawUrl, out var canonical, out var validationError))
|
||||
{
|
||||
// Avoid leaking the raw URL — agents sometimes hand us tokenized
|
||||
// OAuth/reset URLs that fail validation, and our log files have
|
||||
// an effectively-unbounded retention policy. Sanitize to scheme +
|
||||
// host + first path segment.
|
||||
Logger.Warn($"canvas.navigate rejected: {validationError} (sanitized: {UrlLogSanitizer.Sanitize(rawUrl)})");
|
||||
return Error($"Invalid url: {validationError}");
|
||||
}
|
||||
|
||||
Logger.Info($"canvas.navigate: {UrlLogSanitizer.Sanitize(canonical)}");
|
||||
|
||||
var handler = _navigateRequested;
|
||||
if (handler == null)
|
||||
{
|
||||
// No subscriber means there's no surface to navigate and no opener
|
||||
// to fall back to. Tell the agent honestly so it can pick another
|
||||
// tool instead of believing it succeeded.
|
||||
return Error("CANVAS_NOT_AVAILABLE: no navigate handler registered");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var opener = await handler(canonical!);
|
||||
// opener is the subscriber's word for how it serviced the request:
|
||||
// "canvas" (existing WebView2 frame), "browser" (default browser),
|
||||
// or anything else the subscriber wants to surface back to the agent.
|
||||
return Success(new { navigated = true, opener, url = canonical });
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error($"canvas.navigate handler failed: {ex.Message}", ex);
|
||||
return Error($"Navigate failed: {ex.Message}");
|
||||
}
|
||||
|
||||
Logger.Info($"canvas.navigate: {url}");
|
||||
NavigateRequested?.Invoke(this, url);
|
||||
|
||||
return Success(new { navigated = true });
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleEvalAsync(NodeInvokeRequest request)
|
||||
@ -233,15 +116,14 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
|
||||
Logger.Info($"canvas.eval: {script[..Math.Min(50, script.Length)]}...");
|
||||
|
||||
var evalHandler = _evalRequested;
|
||||
if (evalHandler == null)
|
||||
if (EvalRequested == null)
|
||||
{
|
||||
return Error("Canvas not available");
|
||||
}
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
var result = await evalHandler(script);
|
||||
var result = await EvalRequested(script);
|
||||
return Success(new { result });
|
||||
}
|
||||
catch (Exception ex)
|
||||
@ -253,20 +135,19 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
private async Task<NodeInvokeResponse> HandleSnapshotAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var format = GetStringArg(request.Args, "format", "png");
|
||||
var maxWidth = Clamp(GetIntArg(request.Args, "maxWidth", 1200), MinSnapshotWidth, MaxSnapshotWidth);
|
||||
var quality = Clamp(GetIntArg(request.Args, "quality", 80), MinQuality, MaxQuality);
|
||||
var maxWidth = GetIntArg(request.Args, "maxWidth", 1200);
|
||||
var quality = GetIntArg(request.Args, "quality", 80);
|
||||
|
||||
Logger.Info($"canvas.snapshot: format={format}, maxWidth={maxWidth}");
|
||||
|
||||
var snapshotHandler = _snapshotRequested;
|
||||
if (snapshotHandler == null)
|
||||
if (SnapshotRequested == null)
|
||||
{
|
||||
return Error("Canvas not available");
|
||||
}
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
var base64 = await snapshotHandler(new CanvasSnapshotArgs
|
||||
var base64 = await SnapshotRequested(new CanvasSnapshotArgs
|
||||
{
|
||||
Format = format ?? "png",
|
||||
MaxWidth = maxWidth,
|
||||
@ -286,193 +167,53 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
var jsonl = GetStringArg(request.Args, "jsonl");
|
||||
var jsonlPath = GetStringArg(request.Args, "jsonlPath");
|
||||
var props = request.Args.TryGetProperty("props", out var propsEl) ? propsEl : default;
|
||||
|
||||
|
||||
if (string.IsNullOrWhiteSpace(jsonl) && !string.IsNullOrWhiteSpace(jsonlPath))
|
||||
{
|
||||
// Validate jsonlPath to prevent arbitrary file reads.
|
||||
// Resolve to absolute path and reject traversal or suspicious paths.
|
||||
try
|
||||
{
|
||||
jsonl = ReadValidatedJsonlPath(jsonlPath, request.Command);
|
||||
var fullPath = Path.GetFullPath(jsonlPath);
|
||||
var tempRoot = Path.GetFullPath(Path.GetTempPath());
|
||||
if (!fullPath.StartsWith(tempRoot, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Logger.Warn($"{request.Command}: jsonlPath outside temp directory: {fullPath}");
|
||||
return Error("jsonlPath must be within the system temp directory");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error($"{request.Command}: failed to read jsonlPath", ex);
|
||||
return Error($"Invalid jsonlPath: {ex.Message}");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
jsonl = File.ReadAllText(jsonlPath);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error($"{request.Command}: failed to read jsonlPath ({jsonlPath})", ex);
|
||||
return Error($"Failed to read jsonlPath: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (string.IsNullOrWhiteSpace(jsonl))
|
||||
{
|
||||
return Error("Missing jsonl or jsonlPath parameter");
|
||||
}
|
||||
|
||||
// Inline-jsonl size cap. Encoding.UTF8.GetByteCount streams over chars
|
||||
// without allocating, so this is cheap.
|
||||
long byteCount = System.Text.Encoding.UTF8.GetByteCount(jsonl);
|
||||
if (byteCount > MaxA2UIJsonlBytes)
|
||||
{
|
||||
Logger.Warn($"{request.Command}: jsonl payload too large ({byteCount} > {MaxA2UIJsonlBytes})");
|
||||
return Error($"jsonl exceeds maximum size of {MaxA2UIJsonlBytes} bytes");
|
||||
}
|
||||
|
||||
// Line-count cap. A push that fans out to thousands of UI-thread
|
||||
// dispatches has DoS potential even if individually small.
|
||||
int lineCount = CountLines(jsonl);
|
||||
if (lineCount > MaxA2UIJsonlLines)
|
||||
{
|
||||
Logger.Warn($"{request.Command}: jsonl line count too high ({lineCount} > {MaxA2UIJsonlLines})");
|
||||
return Error($"jsonl exceeds maximum of {MaxA2UIJsonlLines} lines");
|
||||
}
|
||||
|
||||
Logger.Info($"{request.Command}: {byteCount} bytes, {lineCount} lines");
|
||||
|
||||
|
||||
Logger.Info($"{request.Command}: {jsonl.Length} chars");
|
||||
|
||||
A2UIPushRequested?.Invoke(this, new CanvasA2UIArgs
|
||||
{
|
||||
Jsonl = jsonl,
|
||||
JsonlPath = jsonlPath,
|
||||
Props = props.ValueKind != default ? props.GetRawText() : "{}"
|
||||
});
|
||||
|
||||
|
||||
return Success(new { pushed = true });
|
||||
}
|
||||
|
||||
private static int CountLines(string s)
|
||||
{
|
||||
// Count non-empty newline-delimited lines without allocating an array.
|
||||
int count = 0;
|
||||
bool inLine = false;
|
||||
for (int i = 0; i < s.Length; i++)
|
||||
{
|
||||
char c = s[i];
|
||||
if (c == '\n' || c == '\r')
|
||||
{
|
||||
if (inLine) { count++; inLine = false; }
|
||||
}
|
||||
else if (!char.IsWhiteSpace(c))
|
||||
{
|
||||
inLine = true;
|
||||
}
|
||||
}
|
||||
if (inLine) count++;
|
||||
return count;
|
||||
}
|
||||
|
||||
private string ReadValidatedJsonlPath(string jsonlPath, string command)
|
||||
{
|
||||
string fullPath;
|
||||
string tempRoot;
|
||||
try
|
||||
{
|
||||
fullPath = Path.GetFullPath(jsonlPath);
|
||||
tempRoot = EnsureTrailingSeparator(Path.GetFullPath(Path.GetTempPath()));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException($"Invalid jsonlPath: {ex.Message}", ex);
|
||||
}
|
||||
|
||||
if (!IsPathWithinRoot(fullPath, tempRoot))
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath outside temp directory: {fullPath}");
|
||||
throw new InvalidOperationException("jsonlPath must be within the system temp directory");
|
||||
}
|
||||
|
||||
var fi = new FileInfo(fullPath);
|
||||
if (fi.Exists && fi.Attributes.HasFlag(FileAttributes.ReparsePoint))
|
||||
{
|
||||
FileSystemInfo? resolved;
|
||||
try
|
||||
{
|
||||
resolved = fi.ResolveLinkTarget(returnFinalTarget: true);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath reparse point could not be resolved: {ex.Message}");
|
||||
throw new InvalidOperationException("jsonlPath contains an unresolvable reparse point", ex);
|
||||
}
|
||||
|
||||
if (resolved == null)
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath reparse point could not be resolved");
|
||||
throw new InvalidOperationException("jsonlPath contains an unresolvable reparse point");
|
||||
}
|
||||
|
||||
if (!IsPathWithinRoot(resolved.FullName, tempRoot))
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath reparse point resolves outside temp directory: {resolved.FullName}");
|
||||
throw new InvalidOperationException("jsonlPath reparse point must resolve within the system temp directory");
|
||||
}
|
||||
}
|
||||
|
||||
using var stream = new FileStream(fullPath, FileMode.Open, FileAccess.Read, FileShare.Read);
|
||||
// GetFinalPathFromHandle is a Windows-only guard (returns "" on non-Windows); skip the
|
||||
// containment check when no resolved path is available — prior symlink resolution covers that case.
|
||||
var finalPath = GetFinalPathFromHandle(stream.SafeFileHandle);
|
||||
if (!string.IsNullOrEmpty(finalPath) && !IsPathWithinRoot(finalPath, tempRoot))
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath file handle resolves outside temp directory: {finalPath}");
|
||||
throw new InvalidOperationException("jsonlPath must resolve within the system temp directory");
|
||||
}
|
||||
|
||||
if (stream.Length > MaxA2UIJsonlBytes)
|
||||
{
|
||||
Logger.Warn($"{command}: jsonlPath file too large ({stream.Length} > {MaxA2UIJsonlBytes})");
|
||||
throw new InvalidOperationException($"jsonlPath exceeds maximum size of {MaxA2UIJsonlBytes} bytes");
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: true);
|
||||
return reader.ReadToEnd();
|
||||
}
|
||||
|
||||
private static bool IsPathWithinRoot(string path, string root)
|
||||
{
|
||||
var normalizedPath = Path.GetFullPath(NormalizeFinalPath(path));
|
||||
var normalizedRoot = EnsureTrailingSeparator(Path.GetFullPath(NormalizeFinalPath(root)));
|
||||
return normalizedPath.StartsWith(normalizedRoot, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static string EnsureTrailingSeparator(string path)
|
||||
{
|
||||
if (path.EndsWith(Path.DirectorySeparatorChar) || path.EndsWith(Path.AltDirectorySeparatorChar))
|
||||
return path;
|
||||
return path + Path.DirectorySeparatorChar;
|
||||
}
|
||||
|
||||
private static string GetFinalPathFromHandle(SafeFileHandle handle)
|
||||
{
|
||||
if (!OperatingSystem.IsWindows())
|
||||
return string.Empty;
|
||||
|
||||
var capacity = 512;
|
||||
while (capacity <= 32768)
|
||||
{
|
||||
var sb = new StringBuilder(capacity);
|
||||
var length = GetFinalPathNameByHandle(handle, sb, (uint)sb.Capacity, 0);
|
||||
if (length == 0)
|
||||
throw new IOException($"GetFinalPathNameByHandle failed with Win32 error {Marshal.GetLastWin32Error()}");
|
||||
if (length < sb.Capacity)
|
||||
return NormalizeFinalPath(sb.ToString());
|
||||
capacity = (int)length + 1;
|
||||
}
|
||||
throw new IOException("GetFinalPathNameByHandle returned an unexpectedly long path");
|
||||
}
|
||||
|
||||
private static string NormalizeFinalPath(string path)
|
||||
{
|
||||
const string extendedPrefix = @"\\?\";
|
||||
const string extendedUncPrefix = @"\\?\UNC\";
|
||||
if (path.StartsWith(extendedUncPrefix, StringComparison.OrdinalIgnoreCase))
|
||||
return @"\\" + path.Substring(extendedUncPrefix.Length);
|
||||
if (path.StartsWith(extendedPrefix, StringComparison.OrdinalIgnoreCase))
|
||||
return path.Substring(extendedPrefix.Length);
|
||||
return path;
|
||||
}
|
||||
|
||||
[DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Unicode)]
|
||||
private static extern uint GetFinalPathNameByHandle(
|
||||
SafeFileHandle hFile,
|
||||
StringBuilder lpszFilePath,
|
||||
uint cchFilePath,
|
||||
uint dwFlags);
|
||||
|
||||
private NodeInvokeResponse HandleA2UIReset(NodeInvokeRequest request)
|
||||
{
|
||||
@ -480,52 +221,6 @@ public class CanvasCapability : NodeCapabilityBase
|
||||
A2UIResetRequested?.Invoke(this, EventArgs.Empty);
|
||||
return Success(new { reset = true });
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleA2UIDumpAsync()
|
||||
{
|
||||
Logger.Info("canvas.a2ui.dump");
|
||||
var dumpHandler = _a2uiDumpRequested;
|
||||
if (dumpHandler == null)
|
||||
return Error("CANVAS_NOT_OPEN: no A2UI canvas is currently active");
|
||||
try
|
||||
{
|
||||
var json = await dumpHandler();
|
||||
// Pass through as a JSON-typed payload so MCP clients see structured data,
|
||||
// not a quoted string.
|
||||
using var doc = System.Text.Json.JsonDocument.Parse(json);
|
||||
return Success(System.Text.Json.JsonSerializer.Deserialize<object>(doc.RootElement.GetRawText()));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Error($"CANVAS_DUMP_FAILED: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleCapsAsync()
|
||||
{
|
||||
var capsHandler = _capsRequested;
|
||||
if (capsHandler == null)
|
||||
{
|
||||
return Success(new
|
||||
{
|
||||
renderer = "none",
|
||||
eval = false,
|
||||
snapshot = false,
|
||||
navigate = false,
|
||||
a2ui = new { version = "0.8", introspect = false },
|
||||
});
|
||||
}
|
||||
try
|
||||
{
|
||||
var json = await capsHandler();
|
||||
using var doc = System.Text.Json.JsonDocument.Parse(json);
|
||||
return Success(System.Text.Json.JsonSerializer.Deserialize<object>(doc.RootElement.GetRawText()));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return Error($"CANVAS_CAPS_FAILED: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class CanvasPresentArgs : EventArgs
|
||||
|
||||
@ -11,9 +11,7 @@ using System.Threading.Tasks;
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Device metadata and system health/status capability.
|
||||
/// device.info - static device metadata (no provider needed).
|
||||
/// device.status - rich system health data via injected IDeviceStatusProvider.
|
||||
/// Device metadata and lightweight health/status capability.
|
||||
/// </summary>
|
||||
public class DeviceCapability : NodeCapabilityBase
|
||||
{
|
||||
@ -25,28 +23,20 @@ public class DeviceCapability : NodeCapabilityBase
|
||||
"device.status"
|
||||
];
|
||||
|
||||
private static readonly HashSet<string> _validSections = new(
|
||||
["os", "cpu", "memory", "disk", "battery"],
|
||||
StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
private readonly IDeviceStatusProvider? _provider;
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
public DeviceCapability(IOpenClawLogger logger, IDeviceStatusProvider provider)
|
||||
: base(logger)
|
||||
public DeviceCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
_provider = provider;
|
||||
}
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
public override Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
{
|
||||
return request.Command switch
|
||||
return Task.FromResult(request.Command switch
|
||||
{
|
||||
"device.info" => HandleInfo(),
|
||||
"device.status" => await HandleStatusAsync(request),
|
||||
"device.status" => HandleStatus(),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private NodeInvokeResponse HandleInfo()
|
||||
@ -70,133 +60,29 @@ public class DeviceCapability : NodeCapabilityBase
|
||||
});
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleStatusAsync(NodeInvokeRequest request)
|
||||
private NodeInvokeResponse HandleStatus()
|
||||
{
|
||||
if (_provider == null)
|
||||
return Error("Device status provider not available");
|
||||
Logger.Info("device.status");
|
||||
|
||||
var sections = GetStringArrayArg(request.Args, "sections");
|
||||
var storage = GetStorageStatus(Logger);
|
||||
var network = GetNetworkStatus(Logger);
|
||||
|
||||
// Reject unknown section names
|
||||
var invalid = sections.Where(s => !_validSections.Contains(s)).ToArray();
|
||||
if (invalid.Length > 0)
|
||||
return Success(new
|
||||
{
|
||||
return Error($"Unknown sections: {string.Join(", ", invalid)}. "
|
||||
+ $"Valid: {string.Join(", ", _validSections)}");
|
||||
}
|
||||
|
||||
bool all = sections.Length == 0;
|
||||
var result = new Dictionary<string, object?>
|
||||
{
|
||||
["collectedAt"] = DateTime.UtcNow.ToString("o")
|
||||
};
|
||||
|
||||
if (all || sections.Contains("os", StringComparer.OrdinalIgnoreCase))
|
||||
result["os"] = SafeCollect("os", () => _provider.GetOsInfo());
|
||||
|
||||
if (all || sections.Contains("cpu", StringComparer.OrdinalIgnoreCase))
|
||||
result["cpu"] = await SafeCollectAsync("cpu", () => _provider.GetCpuInfoAsync());
|
||||
|
||||
if (all || sections.Contains("memory", StringComparer.OrdinalIgnoreCase))
|
||||
result["memory"] = SafeCollect("memory", () => _provider.GetMemoryInfo());
|
||||
|
||||
if (all || sections.Contains("disk", StringComparer.OrdinalIgnoreCase))
|
||||
result["disk"] = SafeCollect("disk", () => _provider.GetDiskInfo());
|
||||
|
||||
if (all || sections.Contains("battery", StringComparer.OrdinalIgnoreCase))
|
||||
result["battery"] = SafeCollect("battery", () => WrapBatteryWithLegacyFields(_provider.GetBatteryInfo()));
|
||||
|
||||
// Always ensure legacy battery fields exist for backward compatibility.
|
||||
// Old contract: { level: null, state: "unknown", lowPowerModeEnabled: false }
|
||||
// Covers: battery not requested (filtered out), provider threw (SafeCollect
|
||||
// returned { error }), or battery is null.
|
||||
{
|
||||
var hasBattery = result.TryGetValue("battery", out var batteryVal) && batteryVal != null;
|
||||
var isError = hasBattery && batteryVal!.GetType().GetProperty("error") != null;
|
||||
|
||||
if (!hasBattery || isError)
|
||||
battery = new
|
||||
{
|
||||
string? errorMsg = null;
|
||||
if (isError)
|
||||
{
|
||||
var errProp = batteryVal!.GetType().GetProperty("error")!.GetValue(batteryVal);
|
||||
errorMsg = errProp?.ToString();
|
||||
}
|
||||
|
||||
result["battery"] = new
|
||||
{
|
||||
level = (double?)null,
|
||||
state = "unknown",
|
||||
lowPowerModeEnabled = false,
|
||||
error = errorMsg
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Legacy fields preserved for backward compatibility with existing consumers.
|
||||
result["thermal"] = new { state = "nominal" };
|
||||
result["storage"] = SafeCollect("storage", () => GetStorageStatus());
|
||||
result["network"] = SafeCollect("network", () => GetNetworkStatus());
|
||||
result["uptimeSeconds"] = Environment.TickCount64 / 1000.0;
|
||||
|
||||
return Success(result);
|
||||
}
|
||||
|
||||
/// <summary>Per-section fault tolerance: one section failing doesn't kill the whole response.</summary>
|
||||
private object? SafeCollect(string section, Func<object> collector)
|
||||
{
|
||||
try { return collector(); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"device.status: {section} collection failed: {ex.Message}");
|
||||
return new { error = ex.Message };
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<object?> SafeCollectAsync(string section, Func<Task<object>> collector)
|
||||
{
|
||||
try { return await collector(); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Warn($"device.status: {section} collection failed: {ex.Message}");
|
||||
return new { error = ex.Message };
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Wraps the provider's battery result with legacy fields (level, state, lowPowerModeEnabled)
|
||||
/// so old consumers that read battery.level / battery.state continue to work.
|
||||
/// </summary>
|
||||
private static object WrapBatteryWithLegacyFields(object providerResult)
|
||||
{
|
||||
// Serialize the provider result to a dictionary so we can merge legacy fields.
|
||||
var json = System.Text.Json.JsonSerializer.Serialize(providerResult);
|
||||
var dict = System.Text.Json.JsonSerializer.Deserialize<Dictionary<string, System.Text.Json.JsonElement>>(json)
|
||||
?? new Dictionary<string, System.Text.Json.JsonElement>();
|
||||
|
||||
// Map new fields to legacy equivalents.
|
||||
double? level = null;
|
||||
if (dict.TryGetValue("chargePercent", out var cp) && cp.ValueKind == System.Text.Json.JsonValueKind.Number)
|
||||
level = cp.GetDouble();
|
||||
|
||||
var isCharging = dict.TryGetValue("isCharging", out var ic)
|
||||
&& ic.ValueKind == System.Text.Json.JsonValueKind.True;
|
||||
|
||||
var state = isCharging ? "charging" : (level.HasValue ? "discharging" : "unknown");
|
||||
|
||||
var result = new Dictionary<string, object?>
|
||||
{
|
||||
// Legacy fields
|
||||
["level"] = level,
|
||||
["state"] = state,
|
||||
["lowPowerModeEnabled"] = false,
|
||||
};
|
||||
|
||||
// Merge all new fields from provider
|
||||
foreach (var kv in dict)
|
||||
result[kv.Key] = kv.Value;
|
||||
|
||||
return result;
|
||||
level = (double?)null,
|
||||
state = "unknown",
|
||||
lowPowerModeEnabled = false
|
||||
},
|
||||
thermal = new
|
||||
{
|
||||
state = "nominal"
|
||||
},
|
||||
storage,
|
||||
network,
|
||||
uptimeSeconds = Environment.TickCount64 / 1000.0
|
||||
});
|
||||
}
|
||||
|
||||
private static string GetModelIdentifier()
|
||||
@ -210,59 +96,67 @@ public class DeviceCapability : NodeCapabilityBase
|
||||
return $"{RuntimeInformation.OSArchitecture}".ToLowerInvariant();
|
||||
}
|
||||
|
||||
#region Legacy helpers (backward compat)
|
||||
|
||||
private static object GetStorageStatus()
|
||||
private static object GetStorageStatus(IOpenClawLogger logger)
|
||||
{
|
||||
var root = Path.GetPathRoot(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile))
|
||||
?? Path.GetPathRoot(AppContext.BaseDirectory)
|
||||
?? string.Empty;
|
||||
var drive = !string.IsNullOrWhiteSpace(root)
|
||||
? new DriveInfo(root)
|
||||
: DriveInfo.GetDrives().FirstOrDefault(d => d.IsReady);
|
||||
|
||||
if (drive is { IsReady: true })
|
||||
try
|
||||
{
|
||||
var totalBytes = drive.TotalSize;
|
||||
var freeBytes = drive.AvailableFreeSpace;
|
||||
return new
|
||||
var root = Path.GetPathRoot(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile))
|
||||
?? Path.GetPathRoot(AppContext.BaseDirectory)
|
||||
?? string.Empty;
|
||||
var drive = !string.IsNullOrWhiteSpace(root)
|
||||
? new DriveInfo(root)
|
||||
: DriveInfo.GetDrives().FirstOrDefault(d => d.IsReady);
|
||||
|
||||
if (drive is { IsReady: true })
|
||||
{
|
||||
totalBytes,
|
||||
freeBytes,
|
||||
usedBytes = Math.Max(0, totalBytes - freeBytes)
|
||||
};
|
||||
var totalBytes = drive.TotalSize;
|
||||
var freeBytes = drive.AvailableFreeSpace;
|
||||
return new
|
||||
{
|
||||
totalBytes,
|
||||
freeBytes,
|
||||
usedBytes = Math.Max(0, totalBytes - freeBytes)
|
||||
};
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.Warn($"device.status: storage status unavailable: {ex.Message}");
|
||||
}
|
||||
|
||||
return new { totalBytes = 0L, freeBytes = 0L, usedBytes = 0L };
|
||||
return new
|
||||
{
|
||||
totalBytes = 0L,
|
||||
freeBytes = 0L,
|
||||
usedBytes = 0L
|
||||
};
|
||||
}
|
||||
|
||||
private static object GetNetworkStatus()
|
||||
private static object GetNetworkStatus(IOpenClawLogger logger)
|
||||
{
|
||||
string[] interfaces;
|
||||
var interfaces = Array.Empty<string>();
|
||||
try
|
||||
{
|
||||
interfaces = NetworkInterface.GetAllNetworkInterfaces()
|
||||
.Where(nic => nic.OperationalStatus == OperationalStatus.Up)
|
||||
.Select(nic => nic.NetworkInterfaceType switch
|
||||
{
|
||||
NetworkInterfaceType.Wireless80211 => "wifi",
|
||||
NetworkInterfaceType.Ethernet
|
||||
or NetworkInterfaceType.GigabitEthernet
|
||||
or NetworkInterfaceType.FastEthernetFx
|
||||
or NetworkInterfaceType.FastEthernetT => "wired",
|
||||
NetworkInterfaceType.Ppp
|
||||
or NetworkInterfaceType.Wwanpp
|
||||
or NetworkInterfaceType.Wwanpp2 => "cellular",
|
||||
_ => "other"
|
||||
})
|
||||
.Select(MapInterfaceType)
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
catch { interfaces = []; }
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.Warn($"device.status: network interfaces unavailable: {ex.Message}");
|
||||
}
|
||||
|
||||
bool isAvailable;
|
||||
try { isAvailable = NetworkInterface.GetIsNetworkAvailable(); }
|
||||
catch { isAvailable = false; }
|
||||
var isAvailable = false;
|
||||
try
|
||||
{
|
||||
isAvailable = NetworkInterface.GetIsNetworkAvailable();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.Warn($"device.status: network availability unavailable: {ex.Message}");
|
||||
}
|
||||
|
||||
return new
|
||||
{
|
||||
@ -273,5 +167,19 @@ public class DeviceCapability : NodeCapabilityBase
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
private static string MapInterfaceType(NetworkInterface nic)
|
||||
{
|
||||
return nic.NetworkInterfaceType switch
|
||||
{
|
||||
NetworkInterfaceType.Wireless80211 => "wifi",
|
||||
NetworkInterfaceType.Ethernet
|
||||
or NetworkInterfaceType.GigabitEthernet
|
||||
or NetworkInterfaceType.FastEthernetFx
|
||||
or NetworkInterfaceType.FastEthernetT => "wired",
|
||||
NetworkInterfaceType.Ppp
|
||||
or NetworkInterfaceType.Wwanpp
|
||||
or NetworkInterfaceType.Wwanpp2 => "cellular",
|
||||
_ => "other"
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,7 +64,7 @@ public class LocationCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("location.get failed", ex);
|
||||
return Error("Location failed");
|
||||
return Error($"Location failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -37,22 +37,15 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
};
|
||||
}
|
||||
|
||||
// Clamp bounds — reject extreme caller values before any work starts.
|
||||
private const int MinDimension = 16;
|
||||
private const int MaxScreenWidth = 7680; // 8K horizontal
|
||||
private const int MinQuality = 1;
|
||||
private const int MaxQuality = 100;
|
||||
private const int MaxScreenIndex = 32; // far above any plausible monitor count
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleCaptureAsync(NodeInvokeRequest request)
|
||||
{
|
||||
var format = GetStringArg(request.Args, "format", "png");
|
||||
var maxWidth = Clamp(GetIntArg(request.Args, "maxWidth", 1920), MinDimension, MaxScreenWidth);
|
||||
var quality = Clamp(GetIntArg(request.Args, "quality", 80), MinQuality, MaxQuality);
|
||||
var maxWidth = GetIntArg(request.Args, "maxWidth", 1920);
|
||||
var quality = GetIntArg(request.Args, "quality", 80);
|
||||
var monitor = GetIntArg(request.Args, "monitor", 0);
|
||||
var screenIndex = Clamp(GetIntArg(request.Args, "screenIndex", monitor), 0, MaxScreenIndex);
|
||||
var screenIndex = GetIntArg(request.Args, "screenIndex", monitor);
|
||||
var includePointer = GetBoolArg(request.Args, "includePointer", true);
|
||||
|
||||
|
||||
Logger.Info($"screen.snapshot: format={format}, maxWidth={maxWidth}, monitor={screenIndex}");
|
||||
|
||||
if (CaptureRequested == null)
|
||||
@ -84,7 +77,7 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Screen capture failed", ex);
|
||||
return Error("Capture failed");
|
||||
return Error($"Capture failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,10 +90,9 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
return Error("Unsupported screen recording format. Only mp4 is supported.");
|
||||
}
|
||||
|
||||
var durationMs = Clamp(GetIntArg(request.Args, "durationMs", 10000), 100, MaxRecordDurationMs);
|
||||
var fpsRaw = GetDoubleArg(request.Args, "fps", 10);
|
||||
var fps = fpsRaw < 1 ? 1 : (fpsRaw > 60 ? 60 : fpsRaw);
|
||||
var screenIndex = Clamp(GetIntArg(request.Args, "screenIndex", 0), 0, MaxScreenIndex);
|
||||
var durationMs = GetIntArg(request.Args, "durationMs", 10000);
|
||||
var fps = GetDoubleArg(request.Args, "fps", 10);
|
||||
var screenIndex = GetIntArg(request.Args, "screenIndex", 0);
|
||||
var includeAudio = GetBoolArg(request.Args, "includeAudio", false);
|
||||
|
||||
Logger.Info($"screen.record: durationMs={durationMs}, fps={fps}, screenIndex={screenIndex}, includeAudio={includeAudio}");
|
||||
@ -134,15 +126,10 @@ public class ScreenCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("Screen recording failed", ex);
|
||||
return Error("Recording failed");
|
||||
return Error($"Recording failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private const int MaxRecordDurationMs = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
private static int Clamp(int value, int min, int max)
|
||||
=> value < min ? min : (value > max ? max : value);
|
||||
|
||||
private static double GetDoubleArg(System.Text.Json.JsonElement args, string name, double defaultValue)
|
||||
{
|
||||
if (args.ValueKind == System.Text.Json.JsonValueKind.Undefined ||
|
||||
|
||||
@ -1,339 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
/// <summary>
|
||||
/// Speech-to-text node capability. Three commands:
|
||||
///
|
||||
/// * <see cref="TranscribeCommand"/> — bounded fixed-duration capture + transcription.
|
||||
/// Caller must specify <c>maxDurationMs</c> (capped at <see cref="MaxTranscribeDurationMs"/>).
|
||||
/// Useful for quick "give me 5 seconds of audio" prompts.
|
||||
///
|
||||
/// * <see cref="ListenCommand"/> — VAD-driven capture that returns when speech ends
|
||||
/// or after <c>timeoutMs</c> (default <see cref="DefaultListenTimeoutMs"/>, range
|
||||
/// <see cref="MinListenTimeoutMs"/>..<see cref="MaxListenTimeoutMs"/>).
|
||||
/// Useful for conversational "listen until I stop talking" prompts.
|
||||
///
|
||||
/// * <see cref="StatusCommand"/> — reports engine readiness (no PII).
|
||||
///
|
||||
/// The actual engine lives in the tray (Whisper.net + NAudio + Silero VAD).
|
||||
/// Whisper is local-first and privacy-respecting; the legacy WinRT
|
||||
/// <c>SpeechRecognizer</c> + desktop SAPI fallback was removed because both
|
||||
/// stacks are old, can leak audio to the Microsoft cloud (online-speech),
|
||||
/// and don't work in unpackaged builds.
|
||||
///
|
||||
/// **Privacy invariants for the response surface:**
|
||||
/// - Validation errors never echo the caller-supplied language string.
|
||||
/// - Handler exceptions never propagate their <c>Message</c> into the response;
|
||||
/// full detail stays in the local logger only. This is critical because
|
||||
/// failed-invoke errors land in recent activity / support bundles.
|
||||
/// - <see cref="StatusCommand"/> response carries no PII (no transcript fragments,
|
||||
/// no language history, no device IDs, no model paths).
|
||||
/// </summary>
|
||||
public sealed class SttCapability : NodeCapabilityBase
|
||||
{
|
||||
public const string TranscribeCommand = "stt.transcribe";
|
||||
public const string ListenCommand = "stt.listen";
|
||||
public const string StatusCommand = "stt.status";
|
||||
|
||||
public const int MaxTranscribeDurationMs = 30_000;
|
||||
public const int MinListenTimeoutMs = 1_000;
|
||||
public const int MaxListenTimeoutMs = 120_000;
|
||||
public const int DefaultListenTimeoutMs = 30_000;
|
||||
|
||||
public const string DefaultLanguage = "en-US";
|
||||
public const string AutoLanguage = "auto";
|
||||
|
||||
/// <summary>
|
||||
/// Engine identifier returned in <c>engineEffective</c> on every successful
|
||||
/// stt.* response. Currently always <c>"whisper"</c>; the field exists so
|
||||
/// adding a future engine doesn't break the wire shape.
|
||||
/// </summary>
|
||||
public const string EngineWhisper = "whisper";
|
||||
|
||||
private static readonly string[] _commands = [TranscribeCommand, ListenCommand, StatusCommand];
|
||||
|
||||
// Conservative BCP-47 check: 2-3 letter language, optional script
|
||||
// (4 letter), optional region (2 letter or 3 digit), each separated
|
||||
// by a hyphen. Rejects whitespace and punctuation that would otherwise
|
||||
// trip Windows.Globalization.Language ctor. The literal "auto"
|
||||
// sentinel is accepted in addition (Whisper supports auto-detect).
|
||||
private static readonly Regex BcpTagRegex = new(
|
||||
"^[A-Za-z]{2,3}(?:-[A-Za-z]{4})?(?:-(?:[A-Za-z]{2}|[0-9]{3}))?$",
|
||||
RegexOptions.Compiled);
|
||||
|
||||
public override string Category => "stt";
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
/// <summary>
|
||||
/// Tray-side handler for <see cref="TranscribeCommand"/>: bounded fixed-duration
|
||||
/// capture + transcription.
|
||||
/// </summary>
|
||||
public event Func<SttTranscribeArgs, CancellationToken, Task<SttTranscribeResult>>? TranscribeRequested;
|
||||
|
||||
/// <summary>
|
||||
/// Tray-side handler for <see cref="ListenCommand"/>: VAD-driven capture that
|
||||
/// returns on end-of-speech or after <c>timeoutMs</c>.
|
||||
/// </summary>
|
||||
public event Func<SttListenArgs, CancellationToken, Task<SttListenResult>>? ListenRequested;
|
||||
|
||||
/// <summary>
|
||||
/// Tray-side handler for <see cref="StatusCommand"/>: returns per-engine readiness.
|
||||
/// </summary>
|
||||
public event Func<CancellationToken, Task<SttStatusResult>>? StatusRequested;
|
||||
|
||||
public SttCapability(IOpenClawLogger logger) : base(logger) { }
|
||||
|
||||
/// <summary>
|
||||
/// Trim and validate a single language tag. Returns the trimmed tag on
|
||||
/// success, the literal <see cref="AutoLanguage"/> sentinel on a case-insensitive
|
||||
/// "auto" input, or <c>null</c> if the input is neither.
|
||||
/// Public so UI surfaces can validate against the same rule the wire applies.
|
||||
/// </summary>
|
||||
public static string? NormalizeLanguageTag(string tag)
|
||||
{
|
||||
var trimmed = tag.Trim();
|
||||
if (string.Equals(trimmed, AutoLanguage, StringComparison.OrdinalIgnoreCase))
|
||||
return AutoLanguage;
|
||||
return BcpTagRegex.IsMatch(trimmed) ? trimmed : null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Resolve the language to use for a recognition call: per-call argument
|
||||
/// wins, then configured setting, then <see cref="DefaultLanguage"/>.
|
||||
/// Returns <c>null</c> if the resolved string fails validation.
|
||||
/// </summary>
|
||||
public static string? ResolveLanguage(string? requested, string? configured)
|
||||
{
|
||||
var candidate = !string.IsNullOrWhiteSpace(requested)
|
||||
? requested
|
||||
: (!string.IsNullOrWhiteSpace(configured) ? configured : DefaultLanguage);
|
||||
|
||||
return NormalizeLanguageTag(candidate!);
|
||||
}
|
||||
|
||||
public override Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
=> ExecuteAsync(request, CancellationToken.None);
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(
|
||||
NodeInvokeRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
return request.Command switch
|
||||
{
|
||||
TranscribeCommand => await HandleTranscribeAsync(request, cancellationToken).ConfigureAwait(false),
|
||||
ListenCommand => await HandleListenAsync(request, cancellationToken).ConfigureAwait(false),
|
||||
StatusCommand => await HandleStatusAsync(cancellationToken).ConfigureAwait(false),
|
||||
_ => Error($"Unknown command: {request.Command}")
|
||||
};
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleTranscribeAsync(
|
||||
NodeInvokeRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// maxDurationMs is required and bounded server-side. We deliberately
|
||||
// reject 0/negative rather than substituting a default — callers
|
||||
// explicitly choose how much mic time they're spending.
|
||||
var maxDurationMs = GetIntArg(request.Args, "maxDurationMs", 0);
|
||||
if (maxDurationMs <= 0)
|
||||
return Error("Missing required maxDurationMs");
|
||||
if (maxDurationMs > MaxTranscribeDurationMs)
|
||||
return Error($"maxDurationMs exceeds {MaxTranscribeDurationMs} ms");
|
||||
|
||||
var requestedLanguage = GetStringArg(request.Args, "language");
|
||||
string? resolvedLanguage = null;
|
||||
if (!string.IsNullOrWhiteSpace(requestedLanguage))
|
||||
{
|
||||
resolvedLanguage = NormalizeLanguageTag(requestedLanguage);
|
||||
if (resolvedLanguage == null)
|
||||
return Error("Invalid language tag");
|
||||
}
|
||||
|
||||
if (TranscribeRequested == null)
|
||||
return Error("STT transcribe not available");
|
||||
|
||||
var args = new SttTranscribeArgs
|
||||
{
|
||||
MaxDurationMs = maxDurationMs,
|
||||
Language = resolvedLanguage // null lets the tray fall back to its configured setting
|
||||
};
|
||||
|
||||
Logger.Info($"stt.transcribe: maxDurationMs={args.MaxDurationMs}, language={args.Language ?? "(default)"}");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await TranscribeRequested(args, cancellationToken).ConfigureAwait(false);
|
||||
return Success(new
|
||||
{
|
||||
transcribed = result.Transcribed,
|
||||
text = result.Text,
|
||||
durationMs = result.DurationMs,
|
||||
language = result.Language,
|
||||
engineEffective = result.EngineEffective
|
||||
});
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return Error("Transcribe canceled");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Privacy: never echo raw exception text into the response. The
|
||||
// exception flows through the failed-invoke path and may be
|
||||
// persisted to recent activity / support bundles. Full detail
|
||||
// stays in the local log only.
|
||||
Logger.Error("STT transcribe failed", ex);
|
||||
return Error("Transcribe failed");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleListenAsync(
|
||||
NodeInvokeRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
// timeoutMs is optional with a sane default; bounded both ways so
|
||||
// a hostile caller can't pin the mic open for an hour.
|
||||
var timeoutMs = GetIntArg(request.Args, "timeoutMs", DefaultListenTimeoutMs);
|
||||
if (timeoutMs < MinListenTimeoutMs) timeoutMs = MinListenTimeoutMs;
|
||||
if (timeoutMs > MaxListenTimeoutMs) timeoutMs = MaxListenTimeoutMs;
|
||||
|
||||
var requestedLanguage = GetStringArg(request.Args, "language");
|
||||
string resolvedLanguage = AutoLanguage;
|
||||
if (!string.IsNullOrWhiteSpace(requestedLanguage))
|
||||
{
|
||||
var normalized = NormalizeLanguageTag(requestedLanguage);
|
||||
if (normalized == null)
|
||||
return Error("Invalid language tag");
|
||||
resolvedLanguage = normalized;
|
||||
}
|
||||
|
||||
if (ListenRequested == null)
|
||||
return Error("STT listen not available");
|
||||
|
||||
var args = new SttListenArgs
|
||||
{
|
||||
TimeoutMs = timeoutMs,
|
||||
Language = resolvedLanguage
|
||||
};
|
||||
|
||||
Logger.Info($"stt.listen: timeoutMs={timeoutMs}, language={resolvedLanguage}");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await ListenRequested(args, cancellationToken).ConfigureAwait(false);
|
||||
return Success(new
|
||||
{
|
||||
text = result.Text,
|
||||
language = result.Language,
|
||||
durationMs = result.DurationMs,
|
||||
segments = result.Segments,
|
||||
engineEffective = result.EngineEffective
|
||||
});
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return Error("Listen canceled");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Same privacy invariant as Transcribe.
|
||||
Logger.Error("STT listen failed", ex);
|
||||
return Error("Listen failed");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<NodeInvokeResponse> HandleStatusAsync(CancellationToken cancellationToken)
|
||||
{
|
||||
if (StatusRequested == null)
|
||||
return Error("STT status not available");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await StatusRequested(cancellationToken).ConfigureAwait(false);
|
||||
return Success(new
|
||||
{
|
||||
engine = result.Engine,
|
||||
readiness = result.Readiness,
|
||||
modelDownloadProgress = result.ModelDownloadProgress,
|
||||
isListenWithVadSupported = result.IsListenWithVadSupported,
|
||||
isBoundedTranscribeSupported = result.IsBoundedTranscribeSupported
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Status must not leak engine internals; carry only a fixed message.
|
||||
Logger.Error("STT status failed", ex);
|
||||
return Error("Status failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class SttTranscribeArgs
|
||||
{
|
||||
public int MaxDurationMs { get; set; }
|
||||
/// <summary>
|
||||
/// BCP-47 tag (e.g., "en-US"), the literal "auto" sentinel, or null
|
||||
/// to let the tray fall back to its configured <c>SttLanguage</c> setting.
|
||||
/// </summary>
|
||||
public string? Language { get; set; }
|
||||
}
|
||||
|
||||
public sealed class SttTranscribeResult
|
||||
{
|
||||
public bool Transcribed { get; set; }
|
||||
public string Text { get; set; } = "";
|
||||
public int DurationMs { get; set; }
|
||||
public string Language { get; set; } = SttCapability.DefaultLanguage;
|
||||
|
||||
/// <summary>
|
||||
/// Engine that served this call. Always <see cref="SttCapability.EngineWhisper"/>
|
||||
/// today; the field exists so a future engine doesn't break the wire.
|
||||
/// </summary>
|
||||
public string EngineEffective { get; set; } = SttCapability.EngineWhisper;
|
||||
}
|
||||
|
||||
public sealed class SttListenArgs
|
||||
{
|
||||
public int TimeoutMs { get; set; }
|
||||
/// <summary>
|
||||
/// BCP-47 tag (e.g., "en-US"), or the literal "auto" sentinel
|
||||
/// (default; lets Whisper auto-detect).
|
||||
/// </summary>
|
||||
public string Language { get; set; } = SttCapability.AutoLanguage;
|
||||
}
|
||||
|
||||
public sealed class SttListenResult
|
||||
{
|
||||
public string Text { get; set; } = "";
|
||||
public string Language { get; set; } = SttCapability.AutoLanguage;
|
||||
public int DurationMs { get; set; }
|
||||
public IReadOnlyList<SttSegment> Segments { get; set; } = Array.Empty<SttSegment>();
|
||||
|
||||
public string EngineEffective { get; set; } = SttCapability.EngineWhisper;
|
||||
}
|
||||
|
||||
public sealed class SttSegment
|
||||
{
|
||||
public string Text { get; set; } = "";
|
||||
public int StartMs { get; set; }
|
||||
public int EndMs { get; set; }
|
||||
}
|
||||
|
||||
public sealed class SttStatusResult
|
||||
{
|
||||
public string Engine { get; set; } = SttCapability.EngineWhisper;
|
||||
|
||||
/// <summary>One of "ready", "initializing", "model-downloading", "model-not-downloaded", "unavailable".</summary>
|
||||
public string Readiness { get; set; } = "unavailable";
|
||||
|
||||
/// <summary>0..1 download progress when <see cref="Readiness"/> == "model-downloading"; null otherwise.</summary>
|
||||
public double? ModelDownloadProgress { get; set; }
|
||||
|
||||
public bool IsListenWithVadSupported { get; set; }
|
||||
public bool IsBoundedTranscribeSupported { get; set; }
|
||||
}
|
||||
@ -1,7 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
@ -13,10 +12,7 @@ namespace OpenClaw.Shared.Capabilities;
|
||||
public class SystemCapability : NodeCapabilityBase
|
||||
{
|
||||
public override string Category => "system";
|
||||
|
||||
private const int DefaultRunTimeoutMs = 30_000;
|
||||
private const int MaxRunTimeoutMs = 600_000; // 10 minutes
|
||||
|
||||
|
||||
private static readonly string[] _commands = new[]
|
||||
{
|
||||
"system.notify",
|
||||
@ -26,26 +22,6 @@ public class SystemCapability : NodeCapabilityBase
|
||||
"system.execApprovals.get",
|
||||
"system.execApprovals.set"
|
||||
};
|
||||
|
||||
private static readonly string[] DangerousAllowPatternFragments =
|
||||
[
|
||||
"remove-item",
|
||||
"rm ",
|
||||
"del ",
|
||||
"erase ",
|
||||
"rd ",
|
||||
"rmdir ",
|
||||
"format-",
|
||||
"stop-computer",
|
||||
"restart-computer",
|
||||
"shutdown",
|
||||
"invoke-webrequest",
|
||||
"invoke-restmethod",
|
||||
"start-process",
|
||||
"set-executionpolicy",
|
||||
"reg ",
|
||||
"net "
|
||||
];
|
||||
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
@ -57,7 +33,6 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
// Exec approval policy (optional - if null, all commands are allowed)
|
||||
private ExecApprovalPolicy? _approvalPolicy;
|
||||
private IExecApprovalPromptHandler? _promptHandler;
|
||||
|
||||
// V2 exec approval handler (null = legacy path; inert until explicitly set)
|
||||
private IExecApprovalV2Handler? _v2Handler;
|
||||
@ -82,11 +57,6 @@ public class SystemCapability : NodeCapabilityBase
|
||||
_approvalPolicy = policy;
|
||||
}
|
||||
|
||||
public void SetPromptHandler(IExecApprovalPromptHandler promptHandler)
|
||||
{
|
||||
_promptHandler = promptHandler;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Install a V2 exec approval handler. When set, system.run routes to the V2 path
|
||||
/// instead of the legacy path. The V2 path is inert until this is called.
|
||||
@ -271,7 +241,7 @@ public class SystemCapability : NodeCapabilityBase
|
||||
{
|
||||
// Rail 1: no silent fallback — handler exceptions become typed denies.
|
||||
Logger.Error($"[system.run] corr={correlationId} path=v2 handler threw", ex);
|
||||
v2Result = ExecApprovalV2Result.ValidationFailed("Handler exception");
|
||||
v2Result = ExecApprovalV2Result.ValidationFailed($"Handler exception: {ex.Message}");
|
||||
}
|
||||
|
||||
Logger.Info($"[system.run] corr={correlationId} decision={v2Result.Code} reason={v2Result.Reason}");
|
||||
@ -316,15 +286,8 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
var shell = GetStringArg(request.Args, "shell");
|
||||
var cwd = GetStringArg(request.Args, "cwd");
|
||||
var timeoutMs = GetIntArg(request.Args, "timeoutMs",
|
||||
GetIntArg(request.Args, "timeout", DefaultRunTimeoutMs));
|
||||
// Clamp caller-supplied timeouts. timeoutMs <= 0 historically meant
|
||||
// "wait forever" inside LocalCommandRunner; that lets a wedged process
|
||||
// pin a handler slot indefinitely, so we coerce to the default. The
|
||||
// upper bound is generous but prevents a multi-day timeout request
|
||||
// from accidentally outliving the tray.
|
||||
if (timeoutMs <= 0) timeoutMs = DefaultRunTimeoutMs;
|
||||
if (timeoutMs > MaxRunTimeoutMs) timeoutMs = MaxRunTimeoutMs;
|
||||
var timeoutMs = GetIntArg(request.Args, "timeoutMs",
|
||||
GetIntArg(request.Args, "timeout", 30000));
|
||||
|
||||
// Parse env dict if present
|
||||
Dictionary<string, string>? env = null;
|
||||
@ -365,7 +328,7 @@ public class SystemCapability : NodeCapabilityBase
|
||||
if (_approvalPolicy != null)
|
||||
{
|
||||
var approval = _approvalPolicy.Evaluate(fullCommand, shell);
|
||||
if (!await EnsureApprovedAsync(fullCommand, shell, approval))
|
||||
if (!approval.Allowed)
|
||||
{
|
||||
Logger.Warn($"system.run DENIED: {fullCommand} ({approval.Reason})");
|
||||
return Error($"Command denied by exec policy: {approval.Reason}");
|
||||
@ -381,7 +344,7 @@ public class SystemCapability : NodeCapabilityBase
|
||||
foreach (var target in parseResult.Targets)
|
||||
{
|
||||
var innerApproval = _approvalPolicy.Evaluate(target.Command, target.Shell);
|
||||
if (!await EnsureApprovedAsync(target.Command, target.Shell, innerApproval))
|
||||
if (!innerApproval.Allowed)
|
||||
{
|
||||
Logger.Warn($"system.run DENIED: {target.Command} ({innerApproval.Reason})");
|
||||
return Error($"Command denied by exec policy: {innerApproval.Reason}");
|
||||
@ -413,62 +376,9 @@ public class SystemCapability : NodeCapabilityBase
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("system.run failed", ex);
|
||||
return Error("Execution failed");
|
||||
return Error($"Execution failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<bool> EnsureApprovedAsync(
|
||||
string command,
|
||||
string? shell,
|
||||
ExecApprovalResult approval,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (approval.Allowed)
|
||||
return true;
|
||||
|
||||
if (approval.Action != ExecApprovalAction.Prompt || _promptHandler == null || _approvalPolicy == null)
|
||||
return false;
|
||||
|
||||
var decision = await _promptHandler.RequestAsync(new ExecApprovalPromptRequest
|
||||
{
|
||||
Command = command,
|
||||
Shell = shell,
|
||||
MatchedPattern = approval.MatchedPattern,
|
||||
Reason = approval.Reason ?? "Command requires approval"
|
||||
}, cancellationToken);
|
||||
|
||||
if (decision.Kind == ExecApprovalPromptDecisionKind.Deny)
|
||||
{
|
||||
Logger.Warn($"system.run DENIED by prompt: {command} ({decision.Reason})");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (decision.Kind == ExecApprovalPromptDecisionKind.AlwaysAllow)
|
||||
{
|
||||
if (CanPersistExactAllowRule(command))
|
||||
{
|
||||
_approvalPolicy.InsertRule(0, new ExecApprovalRule
|
||||
{
|
||||
Pattern = command,
|
||||
Action = ExecApprovalAction.Allow,
|
||||
Shells = string.IsNullOrWhiteSpace(shell) ? null : [shell],
|
||||
Description = "Approved from Windows tray prompt"
|
||||
});
|
||||
Logger.Info($"system.run prompt persisted exact allow rule: {command}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Logger.Warn($"system.run prompt could not persist wildcard command; allowing once only: {command}");
|
||||
}
|
||||
}
|
||||
|
||||
Logger.Info($"system.run APPROVED by prompt: {command} ({decision.Kind})");
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool CanPersistExactAllowRule(string command) =>
|
||||
!string.IsNullOrWhiteSpace(command) &&
|
||||
command.IndexOfAny(['*', '?']) < 0;
|
||||
|
||||
private NodeInvokeResponse HandleExecApprovalsGet()
|
||||
{
|
||||
@ -478,7 +388,6 @@ public class SystemCapability : NodeCapabilityBase
|
||||
}
|
||||
|
||||
var data = _approvalPolicy.GetPolicyData();
|
||||
var policyHash = _approvalPolicy.GetPolicyHash();
|
||||
var rules = data.Rules;
|
||||
var rulesSummary = new object[rules.Count];
|
||||
for (var i = 0; i < rules.Count; i++)
|
||||
@ -497,16 +406,7 @@ public class SystemCapability : NodeCapabilityBase
|
||||
return Success(new
|
||||
{
|
||||
enabled = true,
|
||||
hash = policyHash,
|
||||
baseHash = policyHash,
|
||||
defaultAction = data.DefaultAction.ToString().ToLowerInvariant(),
|
||||
constraints = new
|
||||
{
|
||||
baseHashRequired = true,
|
||||
defaultAllowAllowed = false,
|
||||
broadAllowRulesAllowed = false,
|
||||
dangerousAllowRulesAllowed = false
|
||||
},
|
||||
rules = rulesSummary
|
||||
});
|
||||
}
|
||||
@ -520,19 +420,6 @@ public class SystemCapability : NodeCapabilityBase
|
||||
|
||||
try
|
||||
{
|
||||
var currentHash = _approvalPolicy.GetPolicyHash();
|
||||
if (!TryGetBaseHash(request.Args, out var baseHash))
|
||||
{
|
||||
Logger.Warn("execApprovals.set denied: baseHash is required");
|
||||
return Error("baseHash is required for exec approval policy updates. Refresh policy and retry.");
|
||||
}
|
||||
|
||||
if (!HashesMatch(baseHash, currentHash))
|
||||
{
|
||||
Logger.Warn("execApprovals.set denied: stale baseHash");
|
||||
return Error("Exec approval policy changed since it was loaded. Refresh policy and retry.");
|
||||
}
|
||||
|
||||
// Parse rules from args
|
||||
var rules = new List<ExecApprovalRule>();
|
||||
|
||||
@ -591,115 +478,18 @@ public class SystemCapability : NodeCapabilityBase
|
||||
_ => ExecApprovalAction.Deny
|
||||
};
|
||||
}
|
||||
|
||||
if (defaultAction == ExecApprovalAction.Allow)
|
||||
{
|
||||
Logger.Warn("execApprovals.set denied: default allow is not permitted");
|
||||
return Error("Default allow is not permitted for remote exec approval policy updates.");
|
||||
}
|
||||
|
||||
var validationError = ValidateExecApprovalRules(rules);
|
||||
if (validationError != null)
|
||||
{
|
||||
Logger.Warn($"execApprovals.set denied: {validationError}");
|
||||
return Error(validationError);
|
||||
}
|
||||
|
||||
|
||||
_approvalPolicy.SetRules(rules, defaultAction);
|
||||
var newHash = _approvalPolicy.GetPolicyHash();
|
||||
Logger.Info($"Exec approval policy updated: {rules.Count} rules");
|
||||
|
||||
return Success(new { updated = true, ruleCount = rules.Count, hash = newHash, baseHash = newHash });
|
||||
|
||||
return Success(new { updated = true, ruleCount = rules.Count });
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Logger.Error("execApprovals.set failed", ex);
|
||||
return Error("Failed to update policy");
|
||||
return Error($"Failed to update policy: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ValidateExecApprovalRules(IEnumerable<ExecApprovalRule> rules)
|
||||
{
|
||||
foreach (var rule in rules)
|
||||
{
|
||||
if (rule.Action != ExecApprovalAction.Allow)
|
||||
continue;
|
||||
|
||||
var pattern = rule.Pattern.Trim();
|
||||
if (string.IsNullOrWhiteSpace(pattern))
|
||||
return "Empty allow rule patterns are not permitted.";
|
||||
|
||||
var normalized = pattern.ToLowerInvariant();
|
||||
|
||||
// Catch all-wildcard patterns (e.g. *, **, ?*, * ?) that match any command.
|
||||
// Strip every wildcard character and whitespace; if nothing remains the pattern
|
||||
// is effectively "match everything" and must be blocked regardless of spelling.
|
||||
var nonWildcardContent = normalized.Replace("*", "").Replace("?", "").Trim();
|
||||
if (string.IsNullOrEmpty(nonWildcardContent))
|
||||
return $"Broad allow rule is not permitted: {pattern}";
|
||||
|
||||
// Catch shell-prefixed blanket patterns that match all commands in a given shell
|
||||
// (e.g. "powershell *" allows every PowerShell command).
|
||||
if (normalized is "powershell *" or "pwsh *" or "cmd *" or "cmd.exe *")
|
||||
return $"Broad allow rule is not permitted: {pattern}";
|
||||
|
||||
foreach (var dangerous in DangerousAllowPatternFragments)
|
||||
{
|
||||
if (normalized.Contains(dangerous, StringComparison.Ordinal))
|
||||
return $"Dangerous allow rule is not permitted: {pattern}";
|
||||
|
||||
// Also block stem+wildcard (e.g. "rm*" bypasses "rm " because the
|
||||
// fragment has a trailing space that the wildcard replaces).
|
||||
var stem = dangerous.TrimEnd();
|
||||
if (stem.Length < dangerous.Length &&
|
||||
(normalized.Contains(stem + "*", StringComparison.Ordinal) ||
|
||||
normalized.Contains(stem + "?", StringComparison.Ordinal)))
|
||||
{
|
||||
return $"Dangerous allow rule is not permitted: {pattern}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool TryGetBaseHash(System.Text.Json.JsonElement args, out string baseHash)
|
||||
{
|
||||
baseHash = "";
|
||||
if (args.ValueKind == System.Text.Json.JsonValueKind.Undefined)
|
||||
return false;
|
||||
|
||||
if (args.TryGetProperty("baseHash", out var baseHashEl) &&
|
||||
baseHashEl.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
{
|
||||
baseHash = baseHashEl.GetString() ?? "";
|
||||
return !string.IsNullOrWhiteSpace(baseHash);
|
||||
}
|
||||
|
||||
if (args.TryGetProperty("base_hash", out var baseHashSnakeEl) &&
|
||||
baseHashSnakeEl.ValueKind == System.Text.Json.JsonValueKind.String)
|
||||
{
|
||||
baseHash = baseHashSnakeEl.GetString() ?? "";
|
||||
return !string.IsNullOrWhiteSpace(baseHash);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool HashesMatch(string candidate, string currentHash)
|
||||
{
|
||||
if (string.Equals(candidate, currentHash, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
const string prefix = "sha256:";
|
||||
if (currentHash.StartsWith(prefix, StringComparison.OrdinalIgnoreCase) &&
|
||||
string.Equals(candidate, currentHash[prefix.Length..], StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public class SystemNotifyArgs : EventArgs
|
||||
|
||||
@ -1,119 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Capabilities;
|
||||
|
||||
public sealed class TtsCapability : NodeCapabilityBase
|
||||
{
|
||||
public const string SpeakCommand = "tts.speak";
|
||||
public const string WindowsProvider = "windows";
|
||||
public const string ElevenLabsProvider = "elevenlabs";
|
||||
/// <summary>
|
||||
/// Local neural TTS via Sherpa-ONNX wrapping Piper voices. No network
|
||||
/// egress; voice models download once to %LOCALAPPDATA%.
|
||||
/// </summary>
|
||||
public const string PiperProvider = "piper";
|
||||
public const int MaxTextLength = 5000;
|
||||
|
||||
private static readonly string[] _commands = [SpeakCommand];
|
||||
|
||||
public override string Category => "tts";
|
||||
public override IReadOnlyList<string> Commands => _commands;
|
||||
|
||||
public event Func<TtsSpeakArgs, CancellationToken, Task<TtsSpeakResult>>? SpeakRequested;
|
||||
|
||||
public TtsCapability(IOpenClawLogger logger) : base(logger)
|
||||
{
|
||||
}
|
||||
|
||||
public static string ResolveProvider(string? requestedProvider, string? configuredProvider)
|
||||
{
|
||||
var provider = string.IsNullOrWhiteSpace(requestedProvider)
|
||||
? configuredProvider
|
||||
: requestedProvider;
|
||||
|
||||
return string.IsNullOrWhiteSpace(provider)
|
||||
? PiperProvider
|
||||
: provider.Trim().ToLowerInvariant();
|
||||
}
|
||||
|
||||
public override Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request)
|
||||
=> ExecuteAsync(request, CancellationToken.None);
|
||||
|
||||
public override async Task<NodeInvokeResponse> ExecuteAsync(
|
||||
NodeInvokeRequest request,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
if (!string.Equals(request.Command, SpeakCommand, StringComparison.Ordinal))
|
||||
return Error($"Unknown command: {request.Command}");
|
||||
|
||||
var text = GetStringArg(request.Args, "text")?.Trim();
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
return Error("Missing required text");
|
||||
if (text.Length > MaxTextLength)
|
||||
return Error($"TTS text exceeds {MaxTextLength} characters.");
|
||||
|
||||
if (SpeakRequested == null)
|
||||
return Error("TTS speak not available");
|
||||
|
||||
var args = new TtsSpeakArgs
|
||||
{
|
||||
Text = text,
|
||||
Provider = NormalizeOptional(GetStringArg(request.Args, "provider")),
|
||||
VoiceId = NormalizeOptional(GetStringArg(request.Args, "voiceId")),
|
||||
Model = NormalizeOptional(GetStringArg(request.Args, "model")),
|
||||
Interrupt = GetBoolArg(request.Args, "interrupt")
|
||||
};
|
||||
|
||||
Logger.Info($"tts.speak: provider={args.Provider ?? "(default)"}, chars={args.Text.Length}, interrupt={args.Interrupt}");
|
||||
|
||||
try
|
||||
{
|
||||
var result = await SpeakRequested(args, cancellationToken).ConfigureAwait(false);
|
||||
return Success(new
|
||||
{
|
||||
spoken = result.Spoken,
|
||||
provider = result.Provider,
|
||||
contentType = result.ContentType,
|
||||
durationMs = result.DurationMs
|
||||
});
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
return Error("Speak canceled");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Privacy: never echo raw exception text into the response. The
|
||||
// exception flows through the failed-invoke path and may be
|
||||
// persisted to recent activity / support bundles. ElevenLabs
|
||||
// error messages can contain key prefixes; OS speech errors
|
||||
// can contain device names. Full detail stays in the local
|
||||
// log only. (Same pattern as SttCapability.)
|
||||
Logger.Error("TTS speak failed", ex);
|
||||
return Error("Speak failed");
|
||||
}
|
||||
}
|
||||
|
||||
private static string? NormalizeOptional(string? value)
|
||||
=> string.IsNullOrWhiteSpace(value) ? null : value.Trim();
|
||||
}
|
||||
|
||||
public sealed class TtsSpeakArgs
|
||||
{
|
||||
public string Text { get; set; } = "";
|
||||
public string? Provider { get; set; }
|
||||
public string? VoiceId { get; set; }
|
||||
public string? Model { get; set; }
|
||||
public bool Interrupt { get; set; }
|
||||
}
|
||||
|
||||
public sealed class TtsSpeakResult
|
||||
{
|
||||
public bool Spoken { get; set; } = true;
|
||||
public string Provider { get; set; } = TtsCapability.WindowsProvider;
|
||||
public string? ContentType { get; set; }
|
||||
public int? DurationMs { get; set; }
|
||||
}
|
||||
@ -20,13 +20,10 @@ public static class DeepLinkParser
|
||||
if (!uri.StartsWith(Scheme, StringComparison.OrdinalIgnoreCase))
|
||||
return null;
|
||||
|
||||
var remainder = uri[Scheme.Length..];
|
||||
var remainder = uri[Scheme.Length..].TrimEnd('/');
|
||||
var queryIndex = remainder.IndexOf('?');
|
||||
var query = queryIndex >= 0 ? remainder[(queryIndex + 1)..] : "";
|
||||
// Trim trailing slash AFTER splitting off the query so the
|
||||
// Windows-canonicalized form `openclaw://send/?args=...` (slash
|
||||
// BEFORE the `?`) yields path "send", not "send/".
|
||||
var path = (queryIndex >= 0 ? remainder[..queryIndex] : remainder).TrimEnd('/');
|
||||
var path = queryIndex >= 0 ? remainder[..queryIndex] : remainder;
|
||||
|
||||
var parameters = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var part in query.Split('&', StringSplitOptions.RemoveEmptyEntries))
|
||||
|
||||
@ -1,11 +1,8 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using OpenClaw.Shared.Mcp;
|
||||
using NSec.Cryptography;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
@ -21,66 +18,12 @@ public class DeviceIdentity
|
||||
private PublicKey? _publicKey;
|
||||
private string? _deviceId;
|
||||
private string? _deviceToken;
|
||||
private string[]? _deviceTokenScopes;
|
||||
private string? _nodeDeviceToken;
|
||||
private string[]? _nodeDeviceTokenScopes;
|
||||
|
||||
private static readonly SignatureAlgorithm Ed25519Algorithm = SignatureAlgorithm.Ed25519;
|
||||
|
||||
public string DeviceId => _deviceId ?? throw new InvalidOperationException("Device not initialized");
|
||||
public string PublicKeyBase64Url => _publicKey != null ? Base64UrlEncode(_publicKey.Export(KeyBlobFormat.RawPublicKey)) : throw new InvalidOperationException("Device not initialized");
|
||||
public string? DeviceToken => _deviceToken;
|
||||
public IReadOnlyList<string>? DeviceTokenScopes => _deviceTokenScopes;
|
||||
public string? NodeDeviceToken => _nodeDeviceToken;
|
||||
public IReadOnlyList<string>? NodeDeviceTokenScopes => _nodeDeviceTokenScopes;
|
||||
|
||||
public static string? TryReadStoredDeviceToken(string dataPath, IOpenClawLogger? logger = null) =>
|
||||
TryReadStoredDeviceTokenForRole(dataPath, "operator", logger);
|
||||
|
||||
public static string? TryReadStoredDeviceTokenForRole(string dataPath, string role, IOpenClawLogger? logger = null)
|
||||
{
|
||||
var tokenRole = ParseDeviceTokenRole(role);
|
||||
var keyPath = Path.Combine(dataPath, "device-key-ed25519.json");
|
||||
if (!File.Exists(keyPath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(File.ReadAllText(keyPath));
|
||||
var tokenPropertyName = tokenRole == DeviceTokenRole.Node
|
||||
? nameof(DeviceKeyData.NodeDeviceToken)
|
||||
: nameof(DeviceKeyData.DeviceToken);
|
||||
|
||||
if (doc.RootElement.TryGetProperty(tokenPropertyName, out var deviceToken) &&
|
||||
deviceToken.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var value = deviceToken.GetString();
|
||||
return string.IsNullOrWhiteSpace(value) ? null : value;
|
||||
}
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
logger?.Warn($"Failed to read stored device token: {ex.Message}");
|
||||
}
|
||||
catch (UnauthorizedAccessException ex)
|
||||
{
|
||||
logger?.Warn($"Failed to read stored device token: {ex.Message}");
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
logger?.Warn($"Failed to read stored device token: {ex.Message}");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static bool HasStoredDeviceToken(string dataPath, IOpenClawLogger? logger = null) =>
|
||||
!string.IsNullOrWhiteSpace(TryReadStoredDeviceToken(dataPath, logger));
|
||||
|
||||
public static bool HasStoredDeviceTokenForRole(string dataPath, string role, IOpenClawLogger? logger = null) =>
|
||||
!string.IsNullOrWhiteSpace(TryReadStoredDeviceTokenForRole(dataPath, role, logger));
|
||||
|
||||
public DeviceIdentity(string dataPath, IOpenClawLogger? logger = null)
|
||||
{
|
||||
@ -122,9 +65,6 @@ public class DeviceIdentity
|
||||
_publicKey = _privateKey.PublicKey;
|
||||
_deviceId = data.DeviceId;
|
||||
_deviceToken = data.DeviceToken;
|
||||
_deviceTokenScopes = NormalizeScopes(data.DeviceTokenScopes);
|
||||
_nodeDeviceToken = data.NodeDeviceToken;
|
||||
_nodeDeviceTokenScopes = NormalizeScopes(data.NodeDeviceTokenScopes);
|
||||
|
||||
_logger.Info($"Loaded Ed25519 device identity: {_deviceId?[..16]}...");
|
||||
}
|
||||
@ -169,11 +109,8 @@ public class DeviceIdentity
|
||||
{
|
||||
Directory.CreateDirectory(dir);
|
||||
}
|
||||
if (!string.IsNullOrEmpty(dir))
|
||||
McpAuthToken.TryRestrictDataDirectoryAcl(dir);
|
||||
|
||||
File.WriteAllText(_keyPath, JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true }));
|
||||
McpAuthToken.TryRestrictSensitiveFileAcl(_keyPath);
|
||||
_logger.Info($"Generated new Ed25519 device identity: {_deviceId}");
|
||||
}
|
||||
|
||||
@ -333,40 +270,7 @@ public class DeviceIdentity
|
||||
/// </summary>
|
||||
public void StoreDeviceToken(string token)
|
||||
{
|
||||
StoreDeviceTokenCore(token, null);
|
||||
}
|
||||
|
||||
public void StoreDeviceTokenWithScopes(string token, IEnumerable<string>? scopes)
|
||||
{
|
||||
StoreDeviceTokenCore(token, NormalizeScopes(scopes));
|
||||
}
|
||||
|
||||
public void StoreDeviceTokenForRole(string role, string token, IEnumerable<string>? scopes = null)
|
||||
{
|
||||
var tokenRole = ParseDeviceTokenRole(role);
|
||||
if (tokenRole == DeviceTokenRole.Node)
|
||||
{
|
||||
StoreNodeDeviceTokenCore(token, NormalizeScopes(scopes));
|
||||
return;
|
||||
}
|
||||
|
||||
StoreDeviceTokenCore(token, NormalizeScopes(scopes));
|
||||
}
|
||||
|
||||
private static DeviceTokenRole ParseDeviceTokenRole(string role) => role switch
|
||||
{
|
||||
"operator" => DeviceTokenRole.Operator,
|
||||
"node" => DeviceTokenRole.Node,
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(role), "Device token role must be 'operator' or 'node'.")
|
||||
};
|
||||
|
||||
private void StoreDeviceTokenCore(string token, string[]? scopes)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(token))
|
||||
throw new ArgumentException("Device token cannot be empty.", nameof(token));
|
||||
|
||||
_deviceToken = token;
|
||||
_deviceTokenScopes = scopes;
|
||||
|
||||
// Update the key file with the token
|
||||
try
|
||||
@ -378,9 +282,7 @@ public class DeviceIdentity
|
||||
if (data != null)
|
||||
{
|
||||
data.DeviceToken = token;
|
||||
data.DeviceTokenScopes = scopes;
|
||||
File.WriteAllText(_keyPath, JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true }));
|
||||
McpAuthToken.TryRestrictSensitiveFileAcl(_keyPath);
|
||||
_logger.Info("Device token stored");
|
||||
}
|
||||
}
|
||||
@ -390,48 +292,6 @@ public class DeviceIdentity
|
||||
_logger.Error($"Failed to store device token: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private void StoreNodeDeviceTokenCore(string token, string[]? scopes)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(token))
|
||||
throw new ArgumentException("Device token cannot be empty.", nameof(token));
|
||||
|
||||
_nodeDeviceToken = token;
|
||||
_nodeDeviceTokenScopes = scopes;
|
||||
|
||||
try
|
||||
{
|
||||
if (File.Exists(_keyPath))
|
||||
{
|
||||
var json = File.ReadAllText(_keyPath);
|
||||
var data = JsonSerializer.Deserialize<DeviceKeyData>(json);
|
||||
if (data != null)
|
||||
{
|
||||
data.NodeDeviceToken = token;
|
||||
data.NodeDeviceTokenScopes = scopes;
|
||||
File.WriteAllText(_keyPath, JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true }));
|
||||
_logger.Info("Node device token stored");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Error($"Failed to store node device token: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string[]? NormalizeScopes(IEnumerable<string>? scopes)
|
||||
{
|
||||
if (scopes == null)
|
||||
return null;
|
||||
|
||||
var normalized = scopes
|
||||
.Where(scope => !string.IsNullOrWhiteSpace(scope))
|
||||
.Select(scope => scope.Trim())
|
||||
.Distinct(StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
return normalized.Length == 0 ? null : normalized;
|
||||
}
|
||||
|
||||
private static string Base64UrlEncode(byte[] data)
|
||||
{
|
||||
@ -441,21 +301,12 @@ public class DeviceIdentity
|
||||
.TrimEnd('=');
|
||||
}
|
||||
|
||||
private enum DeviceTokenRole
|
||||
{
|
||||
Operator,
|
||||
Node
|
||||
}
|
||||
|
||||
private class DeviceKeyData
|
||||
{
|
||||
public string? PrivateKeyBase64 { get; set; }
|
||||
public string? PublicKeyBase64 { get; set; }
|
||||
public string? DeviceId { get; set; }
|
||||
public string? DeviceToken { get; set; }
|
||||
public string[]? DeviceTokenScopes { get; set; }
|
||||
public string? NodeDeviceToken { get; set; }
|
||||
public string[]? NodeDeviceTokenScopes { get; set; }
|
||||
public string? Algorithm { get; set; }
|
||||
public long CreatedAt { get; set; }
|
||||
}
|
||||
|
||||
@ -2,8 +2,6 @@ using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
using System.Text.RegularExpressions;
|
||||
@ -36,7 +34,7 @@ public enum ExecApprovalAction
|
||||
{
|
||||
Allow,
|
||||
Deny,
|
||||
Prompt
|
||||
Prompt // Future: show user a confirmation dialog
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -199,13 +197,6 @@ public class ExecApprovalPolicy
|
||||
Rules = _rules.ToList()
|
||||
};
|
||||
}
|
||||
|
||||
public string GetPolicyHash()
|
||||
{
|
||||
var json = JsonSerializer.Serialize(GetPolicyData(), _jsonOptions);
|
||||
var bytes = SHA256.HashData(Encoding.UTF8.GetBytes(json));
|
||||
return $"sha256:{Convert.ToHexString(bytes).ToLowerInvariant()}";
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Load policy from disk. Creates default policy if file doesn't exist.
|
||||
@ -249,8 +240,14 @@ public class ExecApprovalPolicy
|
||||
var dir = Path.GetDirectoryName(_policyFilePath);
|
||||
if (!string.IsNullOrEmpty(dir) && !Directory.Exists(dir))
|
||||
Directory.CreateDirectory(dir);
|
||||
|
||||
var json = JsonSerializer.Serialize(GetPolicyData(), _jsonOptions);
|
||||
|
||||
var data = new ExecPolicyData
|
||||
{
|
||||
DefaultAction = _defaultAction,
|
||||
Rules = _rules
|
||||
};
|
||||
|
||||
var json = JsonSerializer.Serialize(data, _jsonOptions);
|
||||
File.WriteAllText(_policyFilePath, json);
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
||||
@ -1,40 +0,0 @@
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
public enum ExecApprovalPromptDecisionKind
|
||||
{
|
||||
Deny,
|
||||
AllowOnce,
|
||||
AlwaysAllow
|
||||
}
|
||||
|
||||
public sealed class ExecApprovalPromptRequest
|
||||
{
|
||||
public string Command { get; init; } = "";
|
||||
public string? Shell { get; init; }
|
||||
public string? MatchedPattern { get; init; }
|
||||
public string Reason { get; init; } = "";
|
||||
}
|
||||
|
||||
public sealed class ExecApprovalPromptDecision
|
||||
{
|
||||
private ExecApprovalPromptDecision(ExecApprovalPromptDecisionKind kind, string reason)
|
||||
{
|
||||
Kind = kind;
|
||||
Reason = reason;
|
||||
}
|
||||
|
||||
public ExecApprovalPromptDecisionKind Kind { get; }
|
||||
public string Reason { get; }
|
||||
|
||||
public static ExecApprovalPromptDecision Deny(string reason = "Denied by user") => new(ExecApprovalPromptDecisionKind.Deny, reason);
|
||||
public static ExecApprovalPromptDecision AllowOnce(string reason = "Allowed once by user") => new(ExecApprovalPromptDecisionKind.AllowOnce, reason);
|
||||
public static ExecApprovalPromptDecision AlwaysAllow(string reason = "Always allowed by user") => new(ExecApprovalPromptDecisionKind.AlwaysAllow, reason);
|
||||
}
|
||||
|
||||
public interface IExecApprovalPromptHandler
|
||||
{
|
||||
Task<ExecApprovalPromptDecision> RequestAsync(ExecApprovalPromptRequest request, CancellationToken cancellationToken = default);
|
||||
}
|
||||
@ -1,70 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Architectural barrier produced by PR3.
|
||||
// Equivalent to ExecHostValidatedRequest in the macOS reference, extended with resolution outputs.
|
||||
// No module from PR4 onward may accept ValidatedRunRequest as direct input (research doc 05 line 439).
|
||||
// Rail 15: a single canonical representation reused across evaluation, logging, prompting, execution.
|
||||
public sealed class CanonicalCommandIdentity
|
||||
{
|
||||
// ── Normalization outputs ─────────────────────────────────────────────────
|
||||
|
||||
// Argv exactly as produced by PR2 (no trimming; coding contract process-argv-semantics).
|
||||
public IReadOnlyList<string> Command { get; }
|
||||
|
||||
// Canonical display form generated from argv. Never rawCommand from the agent.
|
||||
// Used by logging and prompting. Research doc 05 decision 2.
|
||||
public string DisplayCommand { get; }
|
||||
|
||||
// Safe rawCommand for executable resolution. Null in Windows v1 (rawCommand not in
|
||||
// system.run protocol; research doc 05 OQ-V4 / decision 10).
|
||||
public string? EvaluationRawCommand { get; }
|
||||
|
||||
// ── Resolution outputs ────────────────────────────────────────────────────
|
||||
|
||||
// Singular resolution for the state machine (PR5).
|
||||
// Null if the primary executable cannot be determined.
|
||||
public ExecCommandResolution? Resolution { get; }
|
||||
|
||||
// Per-segment resolutions for the allowlist matcher (PR4/PR5).
|
||||
// Empty list means fail-closed — no allowlist satisfaction possible.
|
||||
public IReadOnlyList<ExecCommandResolution> AllowlistResolutions { get; }
|
||||
|
||||
// Suggested allowlist patterns for prompt/UI (PR6). Not a security decision.
|
||||
public IReadOnlyList<string> AllowAlwaysPatterns { get; }
|
||||
|
||||
// ── Request context (carried from ValidatedRunRequest) ────────────────────
|
||||
|
||||
public string? Cwd { get; }
|
||||
public int TimeoutMs { get; }
|
||||
public IReadOnlyDictionary<string, string>? Env { get; }
|
||||
public string? AgentId { get; }
|
||||
public string? SessionKey { get; }
|
||||
|
||||
internal CanonicalCommandIdentity(
|
||||
IReadOnlyList<string> command,
|
||||
string displayCommand,
|
||||
string? evaluationRawCommand,
|
||||
ExecCommandResolution? resolution,
|
||||
IReadOnlyList<ExecCommandResolution> allowlistResolutions,
|
||||
IReadOnlyList<string> allowAlwaysPatterns,
|
||||
string? cwd,
|
||||
int timeoutMs,
|
||||
IReadOnlyDictionary<string, string>? env,
|
||||
string? agentId,
|
||||
string? sessionKey)
|
||||
{
|
||||
Command = command;
|
||||
DisplayCommand = displayCommand;
|
||||
EvaluationRawCommand = evaluationRawCommand;
|
||||
Resolution = resolution;
|
||||
AllowlistResolutions = allowlistResolutions;
|
||||
AllowAlwaysPatterns = allowAlwaysPatterns;
|
||||
Cwd = cwd;
|
||||
TimeoutMs = timeoutMs;
|
||||
Env = env;
|
||||
AgentId = agentId;
|
||||
SessionKey = sessionKey;
|
||||
}
|
||||
}
|
||||
@ -1,137 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
/// <summary>
|
||||
/// Phase 1 of the V2 exec approval pipeline: structural input validation (rail 18, step 1).
|
||||
/// Parses a raw NodeInvokeRequest into a ValidatedRunRequest or returns validation-failed.
|
||||
/// Does not resolve executables, detect shell wrappers, or evaluate policy.
|
||||
/// </summary>
|
||||
public static class ExecApprovalV2InputValidator
|
||||
{
|
||||
private const int DefaultTimeoutMs = 30_000;
|
||||
|
||||
public static ExecApprovalV2ValidationOutcome Validate(NodeInvokeRequest request)
|
||||
{
|
||||
var argv = TryParseArgv(request.Args, out bool malformedCommand);
|
||||
if (malformedCommand)
|
||||
return Deny("malformed-command");
|
||||
if (argv == null || argv.Length == 0)
|
||||
return Deny("missing-command");
|
||||
if (string.IsNullOrWhiteSpace(argv[0]))
|
||||
return Deny("empty-command");
|
||||
|
||||
// cwd — optional, but empty/whitespace is a caller error; wrong type is a protocol violation
|
||||
string? cwd = null;
|
||||
if (request.Args.ValueKind == JsonValueKind.Object &&
|
||||
request.Args.TryGetProperty("cwd", out var cwdEl))
|
||||
{
|
||||
if (cwdEl.ValueKind != JsonValueKind.String)
|
||||
return Deny("malformed-cwd");
|
||||
var rawCwd = cwdEl.GetString();
|
||||
if (string.IsNullOrWhiteSpace(rawCwd))
|
||||
return Deny("empty-cwd");
|
||||
cwd = rawCwd;
|
||||
}
|
||||
|
||||
// env — must be a JSON object if present; non-string values are a protocol violation
|
||||
IReadOnlyDictionary<string, string>? env = null;
|
||||
if (request.Args.ValueKind == JsonValueKind.Object &&
|
||||
request.Args.TryGetProperty("env", out var envEl))
|
||||
{
|
||||
if (envEl.ValueKind != JsonValueKind.Object)
|
||||
return Deny("malformed-env");
|
||||
var dict = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
||||
foreach (var prop in envEl.EnumerateObject())
|
||||
{
|
||||
if (prop.Value.ValueKind != JsonValueKind.String)
|
||||
return Deny("malformed-env");
|
||||
dict[prop.Name] = prop.Value.GetString() ?? "";
|
||||
}
|
||||
env = dict;
|
||||
}
|
||||
|
||||
// timeoutMs / timeout — positive integer; defaults to 30 000.
|
||||
// Upper-bound clamping (legacy safety limit) is enforced in the execution/policy phase, not here.
|
||||
var timeoutMs = DefaultTimeoutMs;
|
||||
if (request.Args.ValueKind == JsonValueKind.Object)
|
||||
{
|
||||
if (request.Args.TryGetProperty("timeoutMs", out var tmsEl))
|
||||
{
|
||||
if (tmsEl.ValueKind != JsonValueKind.Number || !tmsEl.TryGetInt32(out var v) || v <= 0)
|
||||
return Deny("invalid-timeout");
|
||||
timeoutMs = v;
|
||||
}
|
||||
else if (request.Args.TryGetProperty("timeout", out var tEl))
|
||||
{
|
||||
if (tEl.ValueKind != JsonValueKind.Number || !tEl.TryGetInt32(out var v) || v <= 0)
|
||||
return Deny("invalid-timeout");
|
||||
timeoutMs = v;
|
||||
}
|
||||
}
|
||||
|
||||
return ExecApprovalV2ValidationOutcome.Ok(new ValidatedRunRequest(
|
||||
argv,
|
||||
TryGetString(request.Args, "shell"),
|
||||
cwd,
|
||||
timeoutMs,
|
||||
env,
|
||||
TryGetString(request.Args, "agentId"),
|
||||
TryGetString(request.Args, "sessionKey")));
|
||||
}
|
||||
|
||||
private static ExecApprovalV2ValidationOutcome Deny(string reason)
|
||||
=> ExecApprovalV2ValidationOutcome.Fail(ExecApprovalV2Result.ValidationFailed(reason));
|
||||
|
||||
private static string[]? TryParseArgv(JsonElement args, out bool malformed)
|
||||
{
|
||||
malformed = false;
|
||||
if (args.ValueKind != JsonValueKind.Object ||
|
||||
!args.TryGetProperty("command", out var cmdEl))
|
||||
return null;
|
||||
|
||||
if (cmdEl.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
var list = new List<string>();
|
||||
foreach (var item in cmdEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind != JsonValueKind.String) { malformed = true; return null; }
|
||||
list.Add(item.GetString() ?? "");
|
||||
}
|
||||
return list.Count > 0 ? [.. list] : null;
|
||||
}
|
||||
|
||||
if (cmdEl.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var cmd = cmdEl.GetString();
|
||||
if (string.IsNullOrWhiteSpace(cmd)) return null;
|
||||
|
||||
// Also merge a separate "args" array when command is a bare string.
|
||||
// A non-array "args" value is a protocol violation.
|
||||
if (args.TryGetProperty("args", out var argsEl))
|
||||
{
|
||||
if (argsEl.ValueKind != JsonValueKind.Array) { malformed = true; return null; }
|
||||
var list = new List<string> { cmd };
|
||||
foreach (var item in argsEl.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind != JsonValueKind.String) { malformed = true; return null; }
|
||||
list.Add(item.GetString() ?? "");
|
||||
}
|
||||
return [.. list];
|
||||
}
|
||||
return [cmd];
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? TryGetString(JsonElement args, string key)
|
||||
{
|
||||
if (args.ValueKind != JsonValueKind.Object ||
|
||||
!args.TryGetProperty(key, out var el) ||
|
||||
el.ValueKind != JsonValueKind.String)
|
||||
return null;
|
||||
return el.GetString();
|
||||
}
|
||||
}
|
||||
@ -1,85 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Either a CanonicalCommandIdentity (IsResolved=true) or a typed denial (IsResolved=false).
|
||||
// Produced by ExecApprovalV2Normalizer; consumed by the coordinator pipeline (PR7).
|
||||
public sealed class ExecApprovalV2NormalizationOutcome
|
||||
{
|
||||
public bool IsResolved { get; }
|
||||
public CanonicalCommandIdentity? Identity { get; }
|
||||
public ExecApprovalV2Result? Error { get; }
|
||||
|
||||
private ExecApprovalV2NormalizationOutcome(CanonicalCommandIdentity identity)
|
||||
{
|
||||
IsResolved = true;
|
||||
Identity = identity;
|
||||
}
|
||||
|
||||
private ExecApprovalV2NormalizationOutcome(ExecApprovalV2Result error)
|
||||
{
|
||||
IsResolved = false;
|
||||
Error = error;
|
||||
}
|
||||
|
||||
public static ExecApprovalV2NormalizationOutcome Ok(CanonicalCommandIdentity identity)
|
||||
=> new(identity);
|
||||
|
||||
public static ExecApprovalV2NormalizationOutcome Fail(ExecApprovalV2Result error)
|
||||
=> new(error);
|
||||
}
|
||||
|
||||
// Rail 18 steps 2-4: normalize command form → resolve executable → build canonical identity.
|
||||
// Stateless — safe to call concurrently.
|
||||
public static class ExecApprovalV2Normalizer
|
||||
{
|
||||
public static ExecApprovalV2NormalizationOutcome Normalize(ValidatedRunRequest request)
|
||||
{
|
||||
var argv = request.Argv;
|
||||
var cwd = request.Cwd;
|
||||
var env = request.Env as IReadOnlyDictionary<string, string>;
|
||||
|
||||
// displayCommand is always derived from argv, never from rawCommand (research doc 05 decision 2).
|
||||
var displayCommand = ShellQuoting.FormatExecCommand(argv);
|
||||
|
||||
// rawCommand is null in Windows v1 (system.run does not carry it; research doc 05 OQ-V4).
|
||||
// EvaluationRawCommand stays null — correct and documented conservative output.
|
||||
string? evaluationRawCommand = null;
|
||||
|
||||
// Singular resolution for state machine.
|
||||
var resolution = ExecCommandResolver.Resolve(argv, cwd, env);
|
||||
|
||||
// Multi-segment resolution for allowlist.
|
||||
// Empty list is fail-closed: no allowlist satisfaction possible (research doc 04 R2).
|
||||
// An empty list is NOT itself a denial at this step — the evaluator decides.
|
||||
var allowlistResolutions = ExecCommandResolver.ResolveForAllowlist(
|
||||
argv, evaluationRawCommand, cwd, env);
|
||||
|
||||
// UX patterns for prompting.
|
||||
var allowAlwaysPatterns = ExecCommandResolver.ResolveAllowAlwaysPatterns(argv, cwd, env);
|
||||
|
||||
// Rail 6: if argv is non-empty but resolution is entirely impossible, deny.
|
||||
// "Ambiguous or inconsistent" → typed deny, not silent allow.
|
||||
if (resolution is null && allowlistResolutions.Count == 0)
|
||||
return Fail("executable-resolution-failed");
|
||||
|
||||
var identity = new CanonicalCommandIdentity(
|
||||
argv,
|
||||
displayCommand,
|
||||
evaluationRawCommand,
|
||||
resolution,
|
||||
allowlistResolutions,
|
||||
allowAlwaysPatterns,
|
||||
cwd,
|
||||
request.TimeoutMs,
|
||||
env,
|
||||
request.AgentId,
|
||||
request.SessionKey);
|
||||
|
||||
return ExecApprovalV2NormalizationOutcome.Ok(identity);
|
||||
}
|
||||
|
||||
private static ExecApprovalV2NormalizationOutcome Fail(string reason)
|
||||
=> ExecApprovalV2NormalizationOutcome.Fail(
|
||||
ExecApprovalV2Result.ResolutionFailed(reason));
|
||||
}
|
||||
@ -1,501 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Resolved identity of a single executable token.
|
||||
// Shape mirrors macOS ExecCommandResolution struct.
|
||||
public readonly record struct ExecCommandResolution(
|
||||
string RawExecutable,
|
||||
string? ResolvedPath,
|
||||
string ExecutableName,
|
||||
string? Cwd);
|
||||
|
||||
// The three resolution functions required by the pipeline.
|
||||
// resolve() → singular, for state machine
|
||||
// ResolveForAllowlist() → multi-segment, fail-closed, for allowlist matching
|
||||
// ResolveAllowAlwaysPatterns() → UX suggestions for prompt
|
||||
internal static class ExecCommandResolver
|
||||
{
|
||||
// Windows executable extensions, tried in order for basename search.
|
||||
private static readonly string[] s_extensions = [".exe", ".cmd", ".bat", ".com"];
|
||||
|
||||
// ── Public API ───────────────────────────────────────────────────────────
|
||||
|
||||
// Singular resolution of the primary executable for the state machine.
|
||||
// Returns null if the command is empty or resolution is impossible.
|
||||
// Unwraps transparent env prefixes (no modifiers).
|
||||
internal static ExecCommandResolution? Resolve(
|
||||
IReadOnlyList<string> command,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
var effective = ExecEnvInvocationUnwrapper.UnwrapForResolution(command);
|
||||
if (effective.Count == 0) return null;
|
||||
var raw = effective[0].Trim();
|
||||
return raw.Length == 0 ? null : ResolveExecutable(raw, cwd, env);
|
||||
}
|
||||
|
||||
// Multi-segment resolution for allowlist matching.
|
||||
// Detects shell wrappers; splits payload chain; resolves one executable per segment.
|
||||
// Returns empty list (fail-closed) on any ambiguity, command substitution, or env manipulation.
|
||||
internal static IReadOnlyList<ExecCommandResolution> ResolveForAllowlist(
|
||||
IReadOnlyList<string> command,
|
||||
string? evaluationRawCommand,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
// Fail-closed: any env invocation with modifiers (flags or VAR=val assignments).
|
||||
// The allowlist cannot verify which executable will actually run under a modified env —
|
||||
// the resolver uses the original env while execution uses the modified one.
|
||||
// Subsumes the previous shell-wrapper-only check (Hanselman review finding #2).
|
||||
if (command.Count > 0
|
||||
&& ExecCommandToken.IsEnv(command[0].Trim())
|
||||
&& ExecEnvInvocationUnwrapper.HasModifiers(command))
|
||||
return [];
|
||||
|
||||
var wrapper = ExecShellWrapperNormalizer.Extract(command);
|
||||
if (wrapper.IsWrapper)
|
||||
{
|
||||
if (wrapper.InlineCommand is null) return [];
|
||||
var segments = SplitShellCommandChain(wrapper.InlineCommand);
|
||||
if (segments is null) return [];
|
||||
|
||||
var resolutions = new List<ExecCommandResolution>(segments.Count);
|
||||
foreach (var segment in segments)
|
||||
{
|
||||
var token = ParseFirstToken(segment);
|
||||
if (token is null) return [];
|
||||
// -EncodedCommand and aliases in segment position: fail-closed (research doc 04 S1).
|
||||
if (SegmentUsesEncodedCommand(segment, token)) return [];
|
||||
var res = ResolveExecutable(token, cwd, env);
|
||||
if (res is null) return [];
|
||||
resolutions.Add(res.Value);
|
||||
}
|
||||
return resolutions;
|
||||
}
|
||||
|
||||
// Direct exec: fail-closed if powershell/pwsh invoked directly with -EncodedCommand.
|
||||
// Covers top-level `["powershell", "-enc", ...]` and transparent `["env", "pwsh", "-enc", ...]`.
|
||||
if (DirectExecUsesEncodedCommand(command)) return [];
|
||||
|
||||
var single = ResolveSingle(command, evaluationRawCommand, cwd, env);
|
||||
return single is null ? [] : [single.Value];
|
||||
}
|
||||
|
||||
// UX suggestions of allowlist patterns for prompting.
|
||||
// Unlike ResolveForAllowlist, this unwraps env with modifiers to surface the real executable.
|
||||
internal static IReadOnlyList<string> ResolveAllowAlwaysPatterns(
|
||||
IReadOnlyList<string> command,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
var seen = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
var patterns = new List<string>();
|
||||
CollectPatterns(command, cwd, env, seen, patterns, 0);
|
||||
return patterns;
|
||||
}
|
||||
|
||||
// ── Resolution helpers ───────────────────────────────────────────────────
|
||||
|
||||
private static ExecCommandResolution? ResolveSingle(
|
||||
IReadOnlyList<string> command,
|
||||
string? rawCommand,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
// Prefer first token of evaluationRawCommand when present.
|
||||
if (!string.IsNullOrWhiteSpace(rawCommand))
|
||||
{
|
||||
var token = ParseFirstToken(rawCommand);
|
||||
if (token is not null) return ResolveExecutable(token, cwd, env);
|
||||
}
|
||||
return Resolve(command, cwd, env);
|
||||
}
|
||||
|
||||
private static ExecCommandResolution? ResolveExecutable(
|
||||
string rawExecutable,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
try
|
||||
{
|
||||
var expanded = ExpandTilde(rawExecutable);
|
||||
var hasSep = expanded.Contains('/') || expanded.Contains('\\');
|
||||
|
||||
string? resolvedPath;
|
||||
if (hasSep)
|
||||
{
|
||||
// Reject paths with ':' in non-volume-separator positions (ADS, non-standard forms).
|
||||
if (HasNonStandardColon(expanded)) return null;
|
||||
|
||||
resolvedPath = Path.IsPathFullyQualified(expanded)
|
||||
? Path.GetFullPath(expanded)
|
||||
: Path.GetFullPath(expanded, string.IsNullOrWhiteSpace(cwd)
|
||||
? Directory.GetCurrentDirectory()
|
||||
: cwd.Trim());
|
||||
}
|
||||
else
|
||||
{
|
||||
resolvedPath = FindInPath(expanded, GetSearchPaths(env), GetPathExtensions(env));
|
||||
}
|
||||
|
||||
var name = resolvedPath is not null ? Path.GetFileName(resolvedPath) : expanded;
|
||||
return new ExecCommandResolution(expanded, resolvedPath, name, cwd);
|
||||
}
|
||||
catch { return null; } // fail-closed; intentionally broad — add diagnostic tracing here if needed
|
||||
}
|
||||
|
||||
// ── Shell command chain splitting ────────────────────────────────────────
|
||||
|
||||
// Splits a shell command string on ;, &&, ||, |, &, \n.
|
||||
// Returns null (fail-closed) on command/process substitution: $(...), `...`, <(...), >(...).
|
||||
// Returns null on unclosed quotes or unresolved escapes.
|
||||
private static IReadOnlyList<string>? SplitShellCommandChain(string command)
|
||||
{
|
||||
var trimmed = command.Trim();
|
||||
if (trimmed.Length == 0) return null;
|
||||
|
||||
var segments = new List<string>();
|
||||
var current = new StringBuilder();
|
||||
bool inSingle = false, inDouble = false, escaped = false;
|
||||
var chars = trimmed.ToCharArray();
|
||||
|
||||
for (var i = 0; i < chars.Length; i++)
|
||||
{
|
||||
var ch = chars[i];
|
||||
char? next = i + 1 < chars.Length ? chars[i + 1] : null;
|
||||
|
||||
if (escaped) { current.Append(ch); escaped = false; continue; }
|
||||
if (ch == '\\' && !inSingle) { current.Append(ch); escaped = true; continue; }
|
||||
if (ch == '\'' && !inDouble) { inSingle = !inSingle; current.Append(ch); continue; }
|
||||
if (ch == '"' && !inSingle) { inDouble = !inDouble; current.Append(ch); continue; }
|
||||
|
||||
// Fail-closed on command/process substitution.
|
||||
if (!inSingle && IsCommandSubstitution(ch, next, inDouble)) return null;
|
||||
|
||||
if (!inSingle && !inDouble)
|
||||
{
|
||||
var step = DelimiterStep(ch, i > 0 ? chars[i - 1] : (char?)null, next);
|
||||
if (step.HasValue)
|
||||
{
|
||||
var seg = current.ToString().Trim();
|
||||
if (seg.Length == 0) return null;
|
||||
segments.Add(seg);
|
||||
current.Clear();
|
||||
i += step.Value - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
current.Append(ch);
|
||||
}
|
||||
|
||||
if (escaped || inSingle || inDouble) return null;
|
||||
|
||||
var last = current.ToString().Trim();
|
||||
if (last.Length == 0) return null;
|
||||
segments.Add(last);
|
||||
return segments;
|
||||
}
|
||||
|
||||
private static bool IsCommandSubstitution(char ch, char? next, bool inDouble)
|
||||
{
|
||||
if (inDouble) return ch == '`' || (ch == '$' && next == '(');
|
||||
return ch == '`' ||
|
||||
(ch == '$' && next == '(') ||
|
||||
(ch == '<' && next == '(') ||
|
||||
(ch == '>' && next == '(');
|
||||
}
|
||||
|
||||
private static int? DelimiterStep(char ch, char? prev, char? next)
|
||||
{
|
||||
if (ch == ';' || ch == '\n') return 1;
|
||||
if (ch == '&')
|
||||
{
|
||||
if (next == '&') return 2;
|
||||
return (prev == '>' || next == '>') ? null : (int?)1;
|
||||
}
|
||||
if (ch == '|')
|
||||
{
|
||||
if (next == '|' || next == '&') return 2;
|
||||
return 1;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Extracts the first shell-tokenized word from a command string.
|
||||
private static string? ParseFirstToken(string command)
|
||||
{
|
||||
var trimmed = command.Trim();
|
||||
if (trimmed.Length == 0) return null;
|
||||
var first = trimmed[0];
|
||||
if (first == '"' || first == '\'')
|
||||
{
|
||||
var rest = trimmed.AsSpan(1);
|
||||
var end = rest.IndexOf(first);
|
||||
if (end < 0) return null; // unclosed quote — fail-closed; do not guess the token
|
||||
var inner = rest[..end].ToString();
|
||||
if (inner.Length == 0) return null;
|
||||
// Preserve any suffix after the closing quote up to the next whitespace.
|
||||
// Handles `"git".exe` → "git.exe" and `"C:\Program Files\Git\bin\git".exe` → *.exe.
|
||||
var afterClose = rest[(end + 1)..];
|
||||
var suffixEnd = afterClose.IndexOfAny(' ', '\t');
|
||||
var suffix = suffixEnd >= 0 ? afterClose[..suffixEnd].ToString() : afterClose.ToString();
|
||||
return suffix.Length > 0 ? inner + suffix : inner;
|
||||
}
|
||||
var space = trimmed.AsSpan().IndexOfAny(' ', '\t');
|
||||
return space >= 0 ? trimmed[..space] : trimmed;
|
||||
}
|
||||
|
||||
// ── allowAlwaysPatterns collection ───────────────────────────────────────
|
||||
|
||||
private static void CollectPatterns(
|
||||
IReadOnlyList<string> command,
|
||||
string? cwd,
|
||||
IReadOnlyDictionary<string, string>? env,
|
||||
HashSet<string> seen,
|
||||
List<string> patterns,
|
||||
int depth)
|
||||
{
|
||||
if (depth >= 3 || command.Count == 0) return;
|
||||
|
||||
var wrapper = ExecShellWrapperNormalizer.Extract(command);
|
||||
if (wrapper.IsWrapper && wrapper.InlineCommand is not null)
|
||||
{
|
||||
var segments = SplitShellCommandChain(wrapper.InlineCommand);
|
||||
if (segments is null) return;
|
||||
foreach (var seg in segments)
|
||||
{
|
||||
// allowAlwaysPatterns does NOT fail-closed on -EncodedCommand: it's UX only.
|
||||
var token = ParseFirstToken(seg);
|
||||
if (token is null) continue;
|
||||
var res = ResolveExecutable(token, cwd, env);
|
||||
if (res is null) continue;
|
||||
var pattern = res.Value.ResolvedPath ?? res.Value.RawExecutable;
|
||||
if (seen.Add(pattern)) patterns.Add(pattern);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// For direct exec, unwrap env including with-modifier cases for pattern discovery.
|
||||
var effective = ExecEnvInvocationUnwrapper.UnwrapForResolution(command);
|
||||
if (effective.Count == 0) return;
|
||||
var rawToken = effective[0].Trim();
|
||||
if (rawToken.Length == 0) return;
|
||||
var resolution = ResolveExecutable(rawToken, cwd, env);
|
||||
if (resolution is null) return;
|
||||
var pat = resolution.Value.ResolvedPath ?? resolution.Value.RawExecutable;
|
||||
if (seen.Add(pat)) patterns.Add(pat);
|
||||
}
|
||||
|
||||
// ── -EncodedCommand detection ─────────────────────────────────────────────
|
||||
|
||||
// Research doc 04 S1: if a chain segment invokes PowerShell with -EncodedCommand (or any
|
||||
// alias / unambiguous prefix abbreviation), the payload is opaque base64 — fail-closed.
|
||||
// Only triggers when the first token IS a PowerShell binary AND the segment contains the flag.
|
||||
// `powershell -c 'Get-Date'` (no -enc) must NOT be fail-closed.
|
||||
private static bool SegmentUsesEncodedCommand(string segment, string firstToken)
|
||||
{
|
||||
var b = ExecCommandToken.NormalizedBasename(firstToken);
|
||||
if (b is not ("powershell" or "pwsh")) return false;
|
||||
|
||||
var rest = segment.AsSpan();
|
||||
while (rest.Length > 0)
|
||||
{
|
||||
var i = 0;
|
||||
while (i < rest.Length && char.IsWhiteSpace(rest[i])) i++;
|
||||
rest = rest[i..];
|
||||
if (rest.Length == 0) break;
|
||||
|
||||
// Extract next token — quoted strings count as one unit so `"-enc"` is detected.
|
||||
int end;
|
||||
if (rest[0] is '"' or '\'')
|
||||
{
|
||||
var q = rest[0];
|
||||
end = 1;
|
||||
while (end < rest.Length && rest[end] != q) end++;
|
||||
if (end < rest.Length) end++; // include closing quote
|
||||
}
|
||||
else
|
||||
{
|
||||
end = 0;
|
||||
while (end < rest.Length && !char.IsWhiteSpace(rest[end])) end++;
|
||||
}
|
||||
|
||||
var token = rest[..end].ToString();
|
||||
rest = rest[end..];
|
||||
|
||||
if (IsEncodedCommandFlag(token)) return true;
|
||||
if (token == "--") break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true when a raw flag token (possibly quoted, possibly with colon/equals value suffix)
|
||||
// represents -EncodedCommand or any of its unambiguous prefix abbreviations.
|
||||
// Covers: "-EncodedCommand", "-enc", "-ec", "-e", `"-enc"`, `-enc:payload`, `-encod`, etc.
|
||||
private static bool IsEncodedCommandFlag(string rawToken)
|
||||
{
|
||||
var t = rawToken;
|
||||
if (t.Length >= 2 && t[0] is '"' or '\'' && t[^1] == t[0])
|
||||
t = t[1..^1]; // strip matching outer quotes
|
||||
if (t.Length == 0 || t[0] != '-') return false;
|
||||
// Strip trailing :value or =value (e.g. -EncodedCommand:base64).
|
||||
var sep = t.AsSpan(1).IndexOfAny('=', ':');
|
||||
var flag = (sep >= 0 ? t[..(sep + 1)] : t).ToLowerInvariant();
|
||||
// -e is accepted by Windows PowerShell as a short alias for -EncodedCommand.
|
||||
if (flag is "-e" or "-ec" or "-enc" or "-encodedcommand") return true;
|
||||
// Any unambiguous prefix abbreviation of -encodedcommand beginning at -en.
|
||||
const string full = "-encodedcommand";
|
||||
return flag.Length >= 3 && full.StartsWith(flag, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
// True when direct exec (no shell wrapper) is a PowerShell invocation with -EncodedCommand.
|
||||
// Unwraps transparent env prefixes so `["env", "pwsh", "-enc", ...]` is also caught.
|
||||
private static bool DirectExecUsesEncodedCommand(IReadOnlyList<string> command)
|
||||
{
|
||||
var effective = ExecEnvInvocationUnwrapper.UnwrapForResolution(command);
|
||||
if (effective.Count < 2) return false;
|
||||
var b = ExecCommandToken.NormalizedBasename(effective[0].Trim());
|
||||
if (b is not ("powershell" or "pwsh")) return false;
|
||||
for (var i = 1; i < effective.Count; i++)
|
||||
{
|
||||
var t = effective[i].Trim();
|
||||
if (t == "--") break;
|
||||
if (IsEncodedCommandFlag(t)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// ── PATH search ───────────────────────────────────────────────────────────
|
||||
|
||||
private static string? GetEnvValueIgnoreCase(IReadOnlyDictionary<string, string>? env, string key)
|
||||
{
|
||||
if (env is null) return null;
|
||||
foreach (var kvp in env)
|
||||
{
|
||||
if (string.Equals(kvp.Key, key, StringComparison.OrdinalIgnoreCase))
|
||||
return kvp.Value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? FindInPath(
|
||||
string name,
|
||||
IReadOnlyList<string> searchPaths,
|
||||
IReadOnlyList<string> extensions)
|
||||
{
|
||||
foreach (var dir in searchPaths)
|
||||
{
|
||||
if (string.IsNullOrEmpty(dir)) continue;
|
||||
var candidate = Path.Combine(dir, name);
|
||||
// PATHEXT extensions first — matches Windows CreateProcess resolution order.
|
||||
// A no-extension shadow in PATH must not shadow a PATHEXT binary of the same stem.
|
||||
// Note: PATHEXT is probed even when `name` already carries an extension (git.exe →
|
||||
// tries git.exe.exe, git.exe.cmd, …). This matches CreateProcess behavior — the extra
|
||||
// File.Exists calls are harmless and avoiding them would require extension detection here.
|
||||
foreach (var ext in extensions)
|
||||
{
|
||||
var withExt = candidate + ext;
|
||||
if (File.Exists(withExt)) return TryNormalizePath(withExt);
|
||||
}
|
||||
// Bare name as final fallback (covers names that already have an explicit extension).
|
||||
if (File.Exists(candidate)) return TryNormalizePath(candidate);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> GetSearchPaths(IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
var rawPath = GetEnvValueIgnoreCase(env, "PATH");
|
||||
if (!string.IsNullOrEmpty(rawPath))
|
||||
{
|
||||
var parts = rawPath.Split(Path.PathSeparator, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0) return parts;
|
||||
}
|
||||
// Fallback to process PATH.
|
||||
var processPath = Environment.GetEnvironmentVariable("PATH");
|
||||
if (!string.IsNullOrEmpty(processPath))
|
||||
{
|
||||
var parts = processPath.Split(Path.PathSeparator, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0) return parts;
|
||||
}
|
||||
return WellKnownPaths();
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> GetPathExtensions(IReadOnlyDictionary<string, string>? env)
|
||||
{
|
||||
var rawPathExt = GetEnvValueIgnoreCase(env, "PATHEXT");
|
||||
if (!string.IsNullOrEmpty(rawPathExt))
|
||||
{
|
||||
var parts = rawPathExt.Split(';', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0) return parts;
|
||||
}
|
||||
var processPathExt = Environment.GetEnvironmentVariable("PATHEXT");
|
||||
if (!string.IsNullOrEmpty(processPathExt))
|
||||
{
|
||||
var parts = processPathExt.Split(';', StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0) return parts;
|
||||
}
|
||||
return s_extensions;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> WellKnownPaths()
|
||||
{
|
||||
var sys32 = Path.Combine(
|
||||
Environment.GetFolderPath(Environment.SpecialFolder.Windows), "System32");
|
||||
var sys = Environment.GetFolderPath(Environment.SpecialFolder.System);
|
||||
var pf = Environment.GetFolderPath(Environment.SpecialFolder.ProgramFiles);
|
||||
return
|
||||
[
|
||||
sys32,
|
||||
sys,
|
||||
Path.Combine(sys32, "OpenSSH"),
|
||||
Path.Combine(pf, "Git", "usr", "bin"),
|
||||
Path.Combine(pf, "Git", "bin"),
|
||||
];
|
||||
}
|
||||
|
||||
// ── Path helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
private static string ExpandTilde(string path)
|
||||
{
|
||||
if (!path.StartsWith('~')) return path;
|
||||
var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
|
||||
return path.Length == 1 ? home : home + path[1..];
|
||||
}
|
||||
|
||||
// Paths with ':' outside the volume-separator position are rejected (ADS, non-standard forms).
|
||||
// Research doc 04 section 3 / S3.
|
||||
private static bool HasNonStandardColon(string path)
|
||||
{
|
||||
// Extended-length prefix — strip it and evaluate the remainder (\\?\C:\ is valid).
|
||||
var effective = path.StartsWith(@"\\?\", StringComparison.Ordinal) ? path[4..] : path;
|
||||
|
||||
// UNC paths (\\server\share) and extended UNC (\\?\UNC\...) have no drive colon — fine.
|
||||
if (effective.StartsWith(@"\\", StringComparison.Ordinal)) return false;
|
||||
|
||||
var colonIdx = effective.IndexOf(':');
|
||||
if (colonIdx < 0) return false; // no colon — fine
|
||||
// Drive-letter form: single ASCII letter at index 0 followed by ':' — fine if no second colon.
|
||||
// '1', '!' etc. at index 0 are not valid drive letters and must be rejected.
|
||||
if (colonIdx == 1 && char.IsAsciiLetter(effective[0]))
|
||||
return effective.IndexOf(':', 2) >= 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempt 8.3 → long path normalization for paths that exist on disk.
|
||||
// Only applied to resolved paths from PATH search (existence already confirmed).
|
||||
// Research doc 04 section canonicalization / 8.3 short names.
|
||||
private static string TryNormalizePath(string path)
|
||||
{
|
||||
// GetFullPath resolves . and .. but does not expand 8.3 short names.
|
||||
// Full GetLongPathName P/Invoke is left as OQ-R1 in the research docs.
|
||||
try { return Path.GetFullPath(path); }
|
||||
catch { return path; } // hostile path must not throw out of resolution
|
||||
}
|
||||
}
|
||||
@ -1,28 +0,0 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Utility helpers for command token classification.
|
||||
internal static class ExecCommandToken
|
||||
{
|
||||
// Returns the lowercased last path component (basename) of a token, without extension.
|
||||
internal static string BasenameLower(string token)
|
||||
{
|
||||
var trimmed = token.Trim();
|
||||
if (trimmed.Length == 0) return string.Empty;
|
||||
var name = Path.GetFileName(trimmed.Replace('\\', '/'));
|
||||
if (name.Length == 0) name = trimmed;
|
||||
return name.ToLowerInvariant();
|
||||
}
|
||||
|
||||
// Returns the basename without .exe suffix (lowercased).
|
||||
internal static string NormalizedBasename(string token)
|
||||
{
|
||||
var b = BasenameLower(token);
|
||||
return b.EndsWith(".exe", StringComparison.OrdinalIgnoreCase) ? b[..^4] : b;
|
||||
}
|
||||
|
||||
internal static bool IsEnv(string token) =>
|
||||
NormalizedBasename(token) == "env";
|
||||
}
|
||||
@ -1,100 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Strips `env [OPTIONS] [VAR=VAL...] COMMAND [ARGS...]` so the true executable can be resolved.
|
||||
// Fail-closed: returns null when any unknown flag is encountered or the command cannot be safely
|
||||
// unwrapped. Mirrors ExecEnvInvocationUnwrapper in the windows-app reference.
|
||||
internal static class ExecEnvInvocationUnwrapper
|
||||
{
|
||||
internal const int MaxWrapperDepth = 4;
|
||||
|
||||
private static readonly Regex s_envAssignment =
|
||||
new(@"^[A-Za-z_][A-Za-z0-9_]*=", RegexOptions.Compiled);
|
||||
|
||||
// Strips one level of `env` wrapper.
|
||||
// Returns the remaining argv starting at the real COMMAND token, or null on any ambiguity.
|
||||
internal static IReadOnlyList<string>? Unwrap(IReadOnlyList<string> command)
|
||||
{
|
||||
var idx = 1;
|
||||
var expectsOptionValue = false;
|
||||
|
||||
while (idx < command.Count)
|
||||
{
|
||||
var token = command[idx].Trim();
|
||||
if (token.Length == 0) { idx++; continue; }
|
||||
|
||||
if (expectsOptionValue) { expectsOptionValue = false; idx++; continue; }
|
||||
|
||||
if (token == "--" || token == "-") { idx++; break; }
|
||||
|
||||
if (s_envAssignment.IsMatch(token)) { idx++; continue; }
|
||||
|
||||
if (token.StartsWith('-') && token != "-")
|
||||
{
|
||||
var lower = token.ToLowerInvariant();
|
||||
var flag = lower.Split('=', 2)[0];
|
||||
|
||||
if (ExecEnvOptions.FlagOnly.Contains(flag)) { idx++; continue; }
|
||||
|
||||
if (ExecEnvOptions.WithValue.Contains(flag))
|
||||
{
|
||||
if (!lower.Contains('=')) expectsOptionValue = true;
|
||||
idx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ExecEnvOptions.InlineValuePrefixes.Any(p => lower.StartsWith(p, StringComparison.Ordinal)))
|
||||
{
|
||||
idx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
return null; // Unknown flag — fail-closed.
|
||||
}
|
||||
|
||||
break; // Executable token found.
|
||||
}
|
||||
|
||||
if (idx >= command.Count) return null;
|
||||
return command.Skip(idx).ToList();
|
||||
}
|
||||
|
||||
// Returns true when the env invocation has flags or VAR=val assignments before the command.
|
||||
// `--` ends option processing without modifying the environment → not a modifier.
|
||||
// `-` alone replaces the environment entirely → modifier.
|
||||
internal static bool HasModifiers(IReadOnlyList<string> command)
|
||||
{
|
||||
for (var i = 1; i < command.Count; i++)
|
||||
{
|
||||
var token = command[i].Trim();
|
||||
if (token.Length == 0) continue;
|
||||
if (token == "--") return false;
|
||||
if (token == "-") return true;
|
||||
if (token.StartsWith('-')) return true;
|
||||
if (s_envAssignment.IsMatch(token)) return true;
|
||||
return false; // first non-modifier token is the command
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Iteratively strips env wrappers for executable resolution only.
|
||||
internal static IReadOnlyList<string> UnwrapForResolution(IReadOnlyList<string> command)
|
||||
{
|
||||
var current = command;
|
||||
for (var depth = 0; depth < MaxWrapperDepth; depth++)
|
||||
{
|
||||
if (current.Count == 0) break;
|
||||
var token = current[0].Trim();
|
||||
if (token.Length == 0) break;
|
||||
if (!ExecCommandToken.IsEnv(token)) break;
|
||||
var unwrapped = Unwrap(current);
|
||||
if (unwrapped is null || unwrapped.Count == 0) break;
|
||||
current = unwrapped;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
}
|
||||
@ -1,38 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Option grammar of the POSIX `env` command.
|
||||
// Mirrors the constants in the windows-app reference (ExecEnvOptions.cs).
|
||||
internal static class ExecEnvOptions
|
||||
{
|
||||
// Options that consume the next argument as their value (or use inline = form).
|
||||
internal static readonly HashSet<string> WithValue = new(System.StringComparer.Ordinal)
|
||||
{
|
||||
"-u", "--unset",
|
||||
"-c", "--chdir",
|
||||
"-s", "--split-string",
|
||||
"--default-signal",
|
||||
"--ignore-signal",
|
||||
"--block-signal",
|
||||
};
|
||||
|
||||
// Options that are standalone flags (take no value at all).
|
||||
internal static readonly HashSet<string> FlagOnly = new(System.StringComparer.Ordinal)
|
||||
{
|
||||
"-i", "--ignore-environment",
|
||||
"-0", "--null",
|
||||
};
|
||||
|
||||
// Prefixes for the inline-value form (e.g. `-uFOO` or `--unset=FOO`).
|
||||
internal static readonly IReadOnlyList<string> InlineValuePrefixes =
|
||||
[
|
||||
"-u", "-c", "-s",
|
||||
"--unset=",
|
||||
"--chdir=",
|
||||
"--split-string=",
|
||||
"--default-signal=",
|
||||
"--ignore-signal=",
|
||||
"--block-signal=",
|
||||
];
|
||||
}
|
||||
@ -1,118 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
// Single-level shell wrapper detection for the V2 exec approval pipeline.
|
||||
// Differs from the legacy ExecShellWrapperParser.Expand (BFS multi-level, string-based).
|
||||
// This normalizer operates on argv (IReadOnlyList<string>) and performs one level of
|
||||
// wrapper detection, with recursive env-prefix unwrapping up to MaxWrapperDepth.
|
||||
// Rail 18 step 2: normalize command form.
|
||||
internal static class ExecShellWrapperNormalizer
|
||||
{
|
||||
private enum WrapperKind { Posix, Cmd, PowerShell }
|
||||
|
||||
private sealed record WrapperSpec(WrapperKind Kind, HashSet<string> Names);
|
||||
|
||||
private static readonly HashSet<string> s_posixInlineFlags =
|
||||
new(StringComparer.OrdinalIgnoreCase) { "-lc", "-c", "--command" };
|
||||
|
||||
private static readonly HashSet<string> s_powerShellInlineFlags =
|
||||
new(StringComparer.OrdinalIgnoreCase) { "-c", "-command", "--command" };
|
||||
|
||||
private static readonly WrapperSpec[] s_specs =
|
||||
[
|
||||
new(WrapperKind.Posix, new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{ "ash", "sh", "bash", "zsh", "dash", "ksh", "fish" }),
|
||||
new(WrapperKind.Cmd, new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{ "cmd", "cmd.exe" }),
|
||||
new(WrapperKind.PowerShell, new HashSet<string>(StringComparer.OrdinalIgnoreCase)
|
||||
{ "powershell", "powershell.exe", "pwsh", "pwsh.exe" }),
|
||||
];
|
||||
|
||||
internal sealed record ParsedWrapper(bool IsWrapper, string? InlineCommand);
|
||||
|
||||
internal static readonly ParsedWrapper NotWrapper = new(false, null);
|
||||
|
||||
// Detects a single-level shell wrapper in argv.
|
||||
// rawCommand is always null in Windows v1 (not in system.run protocol; research doc 05 OQ-V4).
|
||||
// Detection is on argv only; rawCommand is accepted for API compatibility with future use.
|
||||
internal static ParsedWrapper Extract(IReadOnlyList<string> command, string? rawCommand = null)
|
||||
=> ExtractInner(command, rawCommand, 0);
|
||||
|
||||
private static ParsedWrapper ExtractInner(
|
||||
IReadOnlyList<string> command, string? rawCommand, int depth)
|
||||
{
|
||||
if (depth >= ExecEnvInvocationUnwrapper.MaxWrapperDepth) return NotWrapper;
|
||||
if (command.Count == 0) return NotWrapper;
|
||||
|
||||
var token0 = command[0].Trim();
|
||||
if (token0.Length == 0) return NotWrapper;
|
||||
|
||||
// Recursively unwrap transparent env prefixes.
|
||||
if (ExecCommandToken.IsEnv(token0))
|
||||
{
|
||||
var unwrapped = ExecEnvInvocationUnwrapper.Unwrap(command);
|
||||
if (unwrapped is null) return NotWrapper;
|
||||
return ExtractInner(unwrapped, rawCommand, depth + 1);
|
||||
}
|
||||
|
||||
var basename = ExecCommandToken.NormalizedBasename(token0);
|
||||
var spec = Array.Find(s_specs, s => s.Names.Contains(basename));
|
||||
if (spec is null) return NotWrapper;
|
||||
|
||||
var payload = ExtractPayload(command, spec);
|
||||
if (payload is null) return NotWrapper;
|
||||
|
||||
return new ParsedWrapper(true, payload);
|
||||
}
|
||||
|
||||
private static string? ExtractPayload(IReadOnlyList<string> command, WrapperSpec spec) =>
|
||||
spec.Kind switch
|
||||
{
|
||||
WrapperKind.Posix => ExtractPosixPayload(command),
|
||||
WrapperKind.Cmd => ExtractCmdPayload(command),
|
||||
WrapperKind.PowerShell => ExtractPowerShellPayload(command),
|
||||
_ => null,
|
||||
};
|
||||
|
||||
private static string? ExtractPosixPayload(IReadOnlyList<string> command)
|
||||
{
|
||||
if (command.Count < 2) return null;
|
||||
var flag = command[1].Trim();
|
||||
if (!s_posixInlineFlags.Contains(flag)) return null;
|
||||
if (command.Count < 3) return null;
|
||||
var payload = command[2].Trim();
|
||||
return payload.Length == 0 ? null : payload;
|
||||
}
|
||||
|
||||
private static string? ExtractCmdPayload(IReadOnlyList<string> command)
|
||||
{
|
||||
for (var i = 1; i < command.Count; i++)
|
||||
{
|
||||
if (string.Equals(command[i].Trim(), "/c", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var tail = string.Join(" ", command.Skip(i + 1)).Trim();
|
||||
return tail.Length == 0 ? null : tail;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? ExtractPowerShellPayload(IReadOnlyList<string> command)
|
||||
{
|
||||
for (var i = 1; i < command.Count; i++)
|
||||
{
|
||||
var t = command[i].Trim().ToLowerInvariant();
|
||||
if (t.Length == 0) continue;
|
||||
if (t == "--") break;
|
||||
if (s_powerShellInlineFlags.Contains(t))
|
||||
{
|
||||
if (i + 1 >= command.Count) return null;
|
||||
var payload = command[i + 1].Trim();
|
||||
return payload.Length == 0 ? null : payload;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -1,62 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace OpenClaw.Shared.ExecApprovals;
|
||||
|
||||
/// <summary>
|
||||
/// Structurally-valid system.run input produced by ExecApprovalV2InputValidator.
|
||||
/// Argv is guaranteed non-empty with a non-blank first element.
|
||||
/// </summary>
|
||||
public sealed class ValidatedRunRequest
|
||||
{
|
||||
public string[] Argv { get; }
|
||||
public string? Shell { get; }
|
||||
public string? Cwd { get; }
|
||||
public int TimeoutMs { get; }
|
||||
public IReadOnlyDictionary<string, string>? Env { get; }
|
||||
public string? AgentId { get; }
|
||||
public string? SessionKey { get; }
|
||||
|
||||
internal ValidatedRunRequest(
|
||||
string[] argv,
|
||||
string? shell,
|
||||
string? cwd,
|
||||
int timeoutMs,
|
||||
IReadOnlyDictionary<string, string>? env,
|
||||
string? agentId,
|
||||
string? sessionKey)
|
||||
{
|
||||
Argv = argv;
|
||||
Shell = shell;
|
||||
Cwd = cwd;
|
||||
TimeoutMs = timeoutMs;
|
||||
Env = env;
|
||||
AgentId = agentId;
|
||||
SessionKey = sessionKey;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Either a ValidatedRunRequest (IsValid=true) or a typed denial (IsValid=false).
|
||||
/// Produced by ExecApprovalV2InputValidator; consumed by the coordinator pipeline.
|
||||
/// </summary>
|
||||
public sealed class ExecApprovalV2ValidationOutcome
|
||||
{
|
||||
public bool IsValid { get; }
|
||||
public ValidatedRunRequest? Request { get; }
|
||||
public ExecApprovalV2Result? Error { get; }
|
||||
|
||||
private ExecApprovalV2ValidationOutcome(ValidatedRunRequest request)
|
||||
{
|
||||
IsValid = true;
|
||||
Request = request;
|
||||
}
|
||||
|
||||
private ExecApprovalV2ValidationOutcome(ExecApprovalV2Result error)
|
||||
{
|
||||
IsValid = false;
|
||||
Error = error;
|
||||
}
|
||||
|
||||
public static ExecApprovalV2ValidationOutcome Ok(ValidatedRunRequest r) => new(r);
|
||||
public static ExecApprovalV2ValidationOutcome Fail(ExecApprovalV2Result e) => new(e);
|
||||
}
|
||||
@ -43,15 +43,7 @@ internal static class ExecEnvSanitizer
|
||||
"LD_LIBRARY_PATH",
|
||||
"LD_AUDIT",
|
||||
"DYLD_INSERT_LIBRARIES",
|
||||
"DYLD_LIBRARY_PATH",
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
"AWS_SESSION_TOKEN",
|
||||
"AZURE_CLIENT_SECRET",
|
||||
"GITHUB_TOKEN",
|
||||
"GH_TOKEN",
|
||||
"NPM_TOKEN",
|
||||
"OPENAI_API_KEY"
|
||||
"DYLD_LIBRARY_PATH"
|
||||
}.ToFrozenSet(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
internal static ExecEnvSanitizeResult Sanitize(Dictionary<string, string>? env)
|
||||
@ -90,84 +82,14 @@ internal static class ExecEnvSanitizer
|
||||
if (name.IndexOfAny(['=', '\0', '\r', '\n']) >= 0)
|
||||
return true;
|
||||
|
||||
// Vectorized scan: any char in [0x00, 0x20] covers all ASCII control characters
|
||||
// (0x01–0x1F) plus space (0x20) in a single SIMD pass — the common fast path for
|
||||
// the ASCII-only names that make up virtually all environment variable keys.
|
||||
var span = name.AsSpan();
|
||||
if (span.IndexOfAnyInRange('\x00', '\x20') >= 0)
|
||||
return true;
|
||||
// DEL (0x7F) — control char outside the range above.
|
||||
if (span.IndexOf('\x7F') >= 0)
|
||||
return true;
|
||||
// Non-ASCII Unicode control / whitespace (rare; UTF-8 env var names are uncommon).
|
||||
for (var i = 0; i < name.Length; i++)
|
||||
foreach (var c in name)
|
||||
{
|
||||
var c = name[i];
|
||||
if (c > '\x7F' && (char.IsControl(c) || char.IsWhiteSpace(c)))
|
||||
if (char.IsControl(c) || char.IsWhiteSpace(c))
|
||||
return true;
|
||||
}
|
||||
|
||||
return _blockedNames.Contains(name)
|
||||
|| HasCredentialMarker(name)
|
||||
|| name.StartsWith("LD_", StringComparison.OrdinalIgnoreCase)
|
||||
|| name.StartsWith("DYLD_", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static bool HasCredentialMarker(string name)
|
||||
{
|
||||
return HasSegment(name, "TOKEN")
|
||||
|| HasSegment(name, "SECRET")
|
||||
|| HasSegment(name, "PASSWORD")
|
||||
|| HasSegment(name, "PASSWD")
|
||||
|| HasCompoundMarker(name, "API", "KEY")
|
||||
|| HasCompoundMarker(name, "ACCESS", "KEY")
|
||||
|| HasCompoundMarker(name, "PRIVATE", "KEY")
|
||||
|| HasCompoundMarker(name, "CLIENT", "SECRET")
|
||||
|| HasCompoundMarker(name, "CONNECTION", "STRING")
|
||||
|| HasSegment(name, "CREDENTIAL")
|
||||
|| HasSegment(name, "CREDENTIALS")
|
||||
|| name.Contains("CONNSTR", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static bool HasCompoundMarker(string name, string first, string second)
|
||||
{
|
||||
var span = name.AsSpan();
|
||||
var firstSpan = first.AsSpan();
|
||||
var secondSpan = second.AsSpan();
|
||||
var start = 0;
|
||||
var previousMatched = false;
|
||||
for (var i = 0; i <= span.Length; i++)
|
||||
{
|
||||
if (i < span.Length && span[i] is not ('_' or '-' or '.'))
|
||||
continue;
|
||||
|
||||
var current = span[start..i];
|
||||
if (previousMatched && current.Equals(secondSpan, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
previousMatched = current.Equals(firstSpan, StringComparison.OrdinalIgnoreCase);
|
||||
start = i + 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static bool HasSegment(string name, string segment)
|
||||
{
|
||||
var span = name.AsSpan();
|
||||
var segmentSpan = segment.AsSpan();
|
||||
var start = 0;
|
||||
for (var i = 0; i <= span.Length; i++)
|
||||
{
|
||||
if (i < span.Length && span[i] is not ('_' or '-' or '.'))
|
||||
continue;
|
||||
|
||||
if (span[start..i].Equals(segmentSpan, StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
start = i + 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -135,26 +135,8 @@ internal static class ExecShellWrapperParser
|
||||
for (var i = 1; i < tokens.Length; i++)
|
||||
{
|
||||
var option = tokens[i];
|
||||
|
||||
// Check for inline separator form first: -flag:value or -flag=value
|
||||
var sepIdx = IndexOfFlagSeparator(option);
|
||||
if (sepIdx > 0)
|
||||
{
|
||||
var flagPart = option[..sepIdx];
|
||||
var valuePart = option[(sepIdx + 1)..];
|
||||
|
||||
if (IsCommandFlag(flagPart))
|
||||
{
|
||||
return string.IsNullOrWhiteSpace(valuePart)
|
||||
? ("", shell, "Shell wrapper payload was empty")
|
||||
: (valuePart, shell, null);
|
||||
}
|
||||
|
||||
if (IsEncodedCommandFlag(flagPart))
|
||||
return DecodeEncodedPayload(valuePart, shell);
|
||||
}
|
||||
|
||||
if (IsCommandFlag(option))
|
||||
if (option.Equals("-Command", StringComparison.OrdinalIgnoreCase) ||
|
||||
option.Equals("-c", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var payload = string.Join(" ", tokens, i + 1, tokens.Length - i - 1).Trim();
|
||||
return string.IsNullOrWhiteSpace(payload)
|
||||
@ -162,68 +144,32 @@ internal static class ExecShellWrapperParser
|
||||
: (payload, shell, null);
|
||||
}
|
||||
|
||||
if (IsEncodedCommandFlag(option))
|
||||
if (option.Equals("-EncodedCommand", StringComparison.OrdinalIgnoreCase) ||
|
||||
option.Equals("-enc", StringComparison.OrdinalIgnoreCase) ||
|
||||
option.Equals("-ec", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var encoded = i + 1 < tokens.Length ? tokens[i + 1] : null;
|
||||
return DecodeEncodedPayload(encoded, shell);
|
||||
if (string.IsNullOrWhiteSpace(encoded))
|
||||
return ("", shell, "Shell wrapper payload was empty");
|
||||
|
||||
try
|
||||
{
|
||||
var bytes = Convert.FromBase64String(encoded);
|
||||
var payload = Encoding.Unicode.GetString(bytes).Trim();
|
||||
return string.IsNullOrWhiteSpace(payload)
|
||||
? ("", shell, "EncodedCommand decoded to an empty payload")
|
||||
: (payload, shell, null);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
return ("", shell, "EncodedCommand could not be decoded");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return default;
|
||||
}
|
||||
|
||||
// Returns the index of the first ':' or '=' in a flag token (after the leading '-').
|
||||
private static int IndexOfFlagSeparator(string token)
|
||||
{
|
||||
for (var i = 1; i < token.Length; i++)
|
||||
{
|
||||
if (token[i] == ':' || token[i] == '=')
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Matches -Command and -c (documented PowerShell -Command aliases).
|
||||
private static bool IsCommandFlag(string flag) =>
|
||||
flag.Equals("-Command", StringComparison.OrdinalIgnoreCase) ||
|
||||
flag.Equals("-c", StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
// Matches -e/-ec aliases and all unique prefix abbreviations of -EncodedCommand.
|
||||
// Windows PowerShell accepts -e as EncodedCommand despite the apparent ambiguity with
|
||||
// -ExecutionPolicy, so the parser must fail closed and decode it.
|
||||
private static bool IsEncodedCommandFlag(string flag)
|
||||
{
|
||||
if (flag.Equals("-e", StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
if (flag.Equals("-ec", StringComparison.OrdinalIgnoreCase))
|
||||
return true;
|
||||
|
||||
const string fullFlag = "-encodedcommand";
|
||||
return flag.Length >= 3 && // minimum: -en
|
||||
flag.Length <= fullFlag.Length &&
|
||||
fullFlag.StartsWith(flag, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static (string? Payload, string? Shell, string? Error) DecodeEncodedPayload(string? encoded, string shell)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(encoded))
|
||||
return ("", shell, "Shell wrapper payload was empty");
|
||||
|
||||
try
|
||||
{
|
||||
var bytes = Convert.FromBase64String(encoded);
|
||||
var payload = Encoding.Unicode.GetString(bytes).Trim();
|
||||
return string.IsNullOrWhiteSpace(payload)
|
||||
? ("", shell, "EncodedCommand decoded to an empty payload")
|
||||
: (payload, shell, null);
|
||||
}
|
||||
catch (FormatException)
|
||||
{
|
||||
return ("", shell, "EncodedCommand could not be decoded");
|
||||
}
|
||||
}
|
||||
|
||||
private static List<string> SplitTopLevelCommands(string command)
|
||||
{
|
||||
var parts = new List<string>();
|
||||
|
||||
@ -1,224 +0,0 @@
|
||||
using System.Net;
|
||||
using System.Net.Sockets;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
public enum HttpUrlSecurityZone
|
||||
{
|
||||
Unknown = -1,
|
||||
LocalMachine = 0,
|
||||
Intranet = 1,
|
||||
Trusted = 2,
|
||||
Internet = 3,
|
||||
Restricted = 4,
|
||||
}
|
||||
|
||||
public sealed record HttpUrlRiskProfile(
|
||||
string CanonicalUrl,
|
||||
string CanonicalOrigin,
|
||||
string HostKey,
|
||||
HttpUrlSecurityZone Zone,
|
||||
bool RequiresConfirmation,
|
||||
IReadOnlyList<string> Reasons);
|
||||
|
||||
/// <summary>
|
||||
/// Centralized risk classifier for agent-supplied HTTP URLs. Callers should run
|
||||
/// <see cref="HttpUrlValidator"/> first; this type decides whether an otherwise
|
||||
/// valid URL needs user confirmation before browser navigation or media handoff.
|
||||
/// </summary>
|
||||
public static class HttpUrlRiskEvaluator
|
||||
{
|
||||
public static HttpUrlRiskProfile Evaluate(string canonicalUrl)
|
||||
{
|
||||
if (!Uri.TryCreate(canonicalUrl, UriKind.Absolute, out var uri))
|
||||
throw new ArgumentException("URL must be an absolute URI", nameof(canonicalUrl));
|
||||
|
||||
var reasons = new List<string>();
|
||||
var host = uri.Host;
|
||||
|
||||
if (!string.Equals(uri.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))
|
||||
reasons.Add("URL does not use HTTPS");
|
||||
|
||||
// Homograph defense: a Unicode hostname that round-trips to a different
|
||||
// ASCII (Punycode) form is suspicious — `аpple.com` (Cyrillic 'а') and
|
||||
// `apple.com` are visually identical but resolve differently. Always
|
||||
// surface the mismatch as a Reason so the prompt fires for IDN hosts.
|
||||
if (!string.Equals(uri.Host, uri.IdnHost, StringComparison.Ordinal))
|
||||
reasons.Add($"Hostname is internationalized; punycode form is '{uri.IdnHost}'");
|
||||
|
||||
if (string.Equals(host, "localhost", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
reasons.Add("Host is localhost");
|
||||
}
|
||||
else if (IPAddress.TryParse(host, out var ip))
|
||||
{
|
||||
reasons.Add("Host is an IP literal");
|
||||
AddAddressRiskReasons(ip, reasons);
|
||||
}
|
||||
else if (!host.Contains('.', StringComparison.Ordinal))
|
||||
{
|
||||
reasons.Add("Host has no dot and may resolve on the local intranet");
|
||||
}
|
||||
|
||||
var zone = MapUrlToZone(canonicalUrl);
|
||||
switch (zone)
|
||||
{
|
||||
case HttpUrlSecurityZone.LocalMachine:
|
||||
reasons.Add("Windows classifies this URL as Local Machine zone");
|
||||
break;
|
||||
case HttpUrlSecurityZone.Intranet:
|
||||
reasons.Add("Windows classifies this URL as Intranet zone");
|
||||
break;
|
||||
case HttpUrlSecurityZone.Restricted:
|
||||
reasons.Add("Windows classifies this URL as Restricted zone");
|
||||
break;
|
||||
}
|
||||
|
||||
var distinctReasons = reasons
|
||||
.Distinct(StringComparer.OrdinalIgnoreCase)
|
||||
.ToArray();
|
||||
|
||||
return new HttpUrlRiskProfile(
|
||||
uri.AbsoluteUri,
|
||||
GetCanonicalOrigin(uri),
|
||||
uri.Authority.ToLowerInvariant(),
|
||||
zone,
|
||||
distinctReasons.Length > 0,
|
||||
distinctReasons);
|
||||
}
|
||||
|
||||
public static bool IsPublicAddress(IPAddress ip)
|
||||
{
|
||||
if (IPAddress.IsLoopback(ip)) return false;
|
||||
if (ip.IsIPv4MappedToIPv6) return IsPublicAddress(ip.MapToIPv4());
|
||||
|
||||
if (ip.AddressFamily == AddressFamily.InterNetwork)
|
||||
{
|
||||
var b = ip.GetAddressBytes();
|
||||
if (b[0] == 0) return false;
|
||||
if (b[0] == 10) return false;
|
||||
if (b[0] == 100 && (b[1] & 0xC0) == 64) return false;
|
||||
if (b[0] == 127) return false;
|
||||
if (b[0] == 169 && b[1] == 254) return false;
|
||||
if (b[0] == 172 && b[1] >= 16 && b[1] <= 31) return false;
|
||||
if (b[0] == 192 && b[1] == 168) return false;
|
||||
if (b[0] >= 224) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ip.AddressFamily == AddressFamily.InterNetworkV6)
|
||||
{
|
||||
// Unspecified (::) — never a routable destination.
|
||||
if (ip.Equals(IPAddress.IPv6None)) return false;
|
||||
if (ip.IsIPv6LinkLocal) return false;
|
||||
if (ip.IsIPv6SiteLocal) return false;
|
||||
if (ip.IsIPv6Multicast) return false;
|
||||
var b = ip.GetAddressBytes();
|
||||
// Unique-local fc00::/7 (existing).
|
||||
if ((b[0] & 0xFE) == 0xFC) return false;
|
||||
// ::ffff:0:0/96 — IPv4-mapped (caught above by IsIPv4MappedToIPv6,
|
||||
// but defensively re-check).
|
||||
if (b[0] == 0 && b[1] == 0 && b[2] == 0 && b[3] == 0 &&
|
||||
b[4] == 0 && b[5] == 0 && b[6] == 0 && b[7] == 0 &&
|
||||
b[8] == 0 && b[9] == 0 && b[10] == 0xFF && b[11] == 0xFF)
|
||||
{
|
||||
var mapped = new IPAddress(new[] { b[12], b[13], b[14], b[15] });
|
||||
return IsPublicAddress(mapped);
|
||||
}
|
||||
// ::/96 IPv4-compatible (deprecated; treat as non-public — never legit
|
||||
// for an agent-supplied URL).
|
||||
if (b[0] == 0 && b[1] == 0 && b[2] == 0 && b[3] == 0 &&
|
||||
b[4] == 0 && b[5] == 0 && b[6] == 0 && b[7] == 0 &&
|
||||
b[8] == 0 && b[9] == 0 && b[10] == 0 && b[11] == 0)
|
||||
return false;
|
||||
// 2001:db8::/32 — documentation prefix (RFC 3849).
|
||||
if (b[0] == 0x20 && b[1] == 0x01 && b[2] == 0x0D && b[3] == 0xB8) return false;
|
||||
// 2001:0000::/32 — Teredo (relay tunneling; not a normal target).
|
||||
if (b[0] == 0x20 && b[1] == 0x01 && b[2] == 0x00 && b[3] == 0x00) return false;
|
||||
// 100::/64 — discard-only address block (RFC 6666).
|
||||
if (b[0] == 0x01 && b[1] == 0x00 &&
|
||||
b[2] == 0 && b[3] == 0 && b[4] == 0 && b[5] == 0 && b[6] == 0 && b[7] == 0)
|
||||
return false;
|
||||
// 2002::/16 — 6to4. Block: payload destination is unverifiable.
|
||||
if (b[0] == 0x20 && b[1] == 0x02) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static void AddAddressRiskReasons(IPAddress ip, List<string> reasons)
|
||||
{
|
||||
if (IPAddress.IsLoopback(ip))
|
||||
{
|
||||
reasons.Add("Address is loopback");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!IsPublicAddress(ip))
|
||||
reasons.Add("Address is private, link-local, multicast, or reserved");
|
||||
}
|
||||
|
||||
private static string GetCanonicalOrigin(Uri uri)
|
||||
{
|
||||
var origin = uri.GetLeftPart(UriPartial.Authority);
|
||||
return origin.EndsWith("/", StringComparison.Ordinal) ? origin : origin + "/";
|
||||
}
|
||||
|
||||
private static HttpUrlSecurityZone MapUrlToZone(string url)
|
||||
{
|
||||
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
|
||||
IInternetSecurityManager? manager = null;
|
||||
try
|
||||
{
|
||||
var type = Type.GetTypeFromCLSID(
|
||||
new Guid("7b8a2d94-0ac9-11d1-896c-00c04fb6bfc4"),
|
||||
throwOnError: false);
|
||||
if (type == null)
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
|
||||
manager = Activator.CreateInstance(type) as IInternetSecurityManager;
|
||||
if (manager == null)
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
|
||||
var hr = manager.MapUrlToZone(url, out var zone, 0);
|
||||
if (hr != 0)
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
return Enum.IsDefined(typeof(HttpUrlSecurityZone), zone)
|
||||
? (HttpUrlSecurityZone)zone
|
||||
: HttpUrlSecurityZone.Unknown;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return HttpUrlSecurityZone.Unknown;
|
||||
}
|
||||
finally
|
||||
{
|
||||
if (manager != null)
|
||||
{
|
||||
try { Marshal.ReleaseComObject(manager); } catch { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[ComImport]
|
||||
[Guid("79eac9ee-baf9-11ce-8c82-00aa004ba90b")]
|
||||
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
|
||||
private interface IInternetSecurityManager
|
||||
{
|
||||
[PreserveSig]
|
||||
int SetSecuritySite(IntPtr site);
|
||||
|
||||
[PreserveSig]
|
||||
int GetSecuritySite(out IntPtr site);
|
||||
|
||||
[PreserveSig]
|
||||
int MapUrlToZone(
|
||||
[MarshalAs(UnmanagedType.LPWStr)] string url,
|
||||
out int zone,
|
||||
int flags);
|
||||
}
|
||||
}
|
||||
@ -1,67 +0,0 @@
|
||||
using System;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// Strict validator for agent-supplied URLs that the node will hand off to a
|
||||
/// browser via shell-execute. Defense-in-depth around <c>canvas.navigate</c>:
|
||||
/// the gateway should already only emit http(s), but treating that as
|
||||
/// authoritative would let a misbehaving / compromised agent ask the node to
|
||||
/// shell-execute <c>file:</c>, <c>javascript:</c>, app-protocol URIs, or
|
||||
/// credential-stuffed URLs that visually masquerade as legitimate.
|
||||
/// </summary>
|
||||
public static class HttpUrlValidator
|
||||
{
|
||||
/// <summary>
|
||||
/// Parse <paramref name="raw"/> and accept only absolute http/https URLs
|
||||
/// with a non-empty host and no userinfo. On success, <paramref name="canonical"/>
|
||||
/// is the re-serialized form (<see cref="Uri.AbsoluteUri"/>) — what the
|
||||
/// caller should hand to the OS, not the raw input string.
|
||||
/// </summary>
|
||||
public static bool TryParse(string? raw, out string? canonical, out string? error)
|
||||
{
|
||||
canonical = null;
|
||||
error = null;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(raw))
|
||||
{
|
||||
error = "url is empty";
|
||||
return false;
|
||||
}
|
||||
|
||||
var trimmed = raw.Trim();
|
||||
if (!Uri.TryCreate(trimmed, UriKind.Absolute, out var uri))
|
||||
{
|
||||
error = "url is not an absolute URI";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Scheme check is ordinal-ignore-case: Uri lowercases the scheme on
|
||||
// parse, but explicit comparison documents intent and survives any
|
||||
// future Uri changes.
|
||||
if (!string.Equals(uri.Scheme, Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) &&
|
||||
!string.Equals(uri.Scheme, Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
error = $"scheme '{uri.Scheme}' is not allowed (only http/https)";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (string.IsNullOrEmpty(uri.Host))
|
||||
{
|
||||
error = "url has no host";
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reject userinfo: https://attacker@evil.com is technically valid HTTP
|
||||
// but is a phishing pattern (the visible "attacker" looks like a host
|
||||
// to non-experts). Browsers warn on these too.
|
||||
if (!string.IsNullOrEmpty(uri.UserInfo))
|
||||
{
|
||||
error = "url contains userinfo (user:password@) which is not allowed";
|
||||
return false;
|
||||
}
|
||||
|
||||
canonical = uri.AbsoluteUri;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -1,27 +0,0 @@
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// Provider interface for platform-specific device status data collection.
|
||||
/// Each method returns an object that will be serialized to JSON.
|
||||
/// Implementations should handle their own error cases gracefully.
|
||||
/// </summary>
|
||||
public interface IDeviceStatusProvider : IDisposable
|
||||
{
|
||||
/// <summary>OS version, architecture, machine name, uptime.</summary>
|
||||
object GetOsInfo();
|
||||
|
||||
/// <summary>CPU name, logical processor count, usage percent (may be null during warm-up).</summary>
|
||||
Task<object> GetCpuInfoAsync();
|
||||
|
||||
/// <summary>Total/available memory in bytes and usage percent.</summary>
|
||||
object GetMemoryInfo();
|
||||
|
||||
/// <summary>Fixed drive info: name, label, total/free bytes, usage percent, format.</summary>
|
||||
object GetDiskInfo();
|
||||
|
||||
/// <summary>Battery presence, charge level, charging state, estimated time remaining.</summary>
|
||||
object GetBatteryInfo();
|
||||
}
|
||||
@ -1,25 +0,0 @@
|
||||
using System;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// Shared literal-host classifier for gateway URLs that point at the local machine.
|
||||
/// </summary>
|
||||
public static class LocalGatewayUrlClassifier
|
||||
{
|
||||
public static bool IsLocalGatewayUrl(string url)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(url)) return false;
|
||||
|
||||
try
|
||||
{
|
||||
var uri = new Uri(url);
|
||||
var host = uri.Host.ToLowerInvariant();
|
||||
return host is "localhost" or "127.0.0.1" or "::1" or "[::1]";
|
||||
}
|
||||
catch
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,284 +0,0 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Runtime.Versioning;
|
||||
using System.Security.AccessControl;
|
||||
using System.Security.Cryptography;
|
||||
using System.Security.Principal;
|
||||
using System.Text;
|
||||
|
||||
namespace OpenClaw.Shared.Mcp;
|
||||
|
||||
/// <summary>
|
||||
/// Manages the MCP server's bearer token.
|
||||
///
|
||||
/// The token lives next to the rest of the tray's settings, at
|
||||
/// <c>%APPDATA%\OpenClawTray\mcp-token.txt</c> (the exact path is composed by
|
||||
/// the tray from <c>SettingsManager.SettingsDirectoryPath</c> and surfaced as
|
||||
/// <c>NodeService.McpTokenPath</c> — that's the source of truth, not anything
|
||||
/// in this file). Co-locating with settings means the test-suite override
|
||||
/// <c>OPENCLAW_TRAY_DATA_DIR</c> isolates the token file too.
|
||||
///
|
||||
/// The token is **created lazily on first MCP server start** (i.e. the first
|
||||
/// time the user enables Local MCP Server in Settings — until then the file
|
||||
/// does not exist) and then **persists across tray restarts**. Local CLIs and
|
||||
/// per-user agent registrations read the file and send the contents on every
|
||||
/// request as <c>Authorization: Bearer <contents></c>.
|
||||
///
|
||||
/// Defense in depth: the file inherits the parent directory's ACL — by default
|
||||
/// only the current user (and SYSTEM/Administrators) can read it; the listener
|
||||
/// is bound to loopback so the endpoint is invisible to other machines; and
|
||||
/// Origin/Host checks block browser cross-origin attacks. The bearer is the
|
||||
/// last line of defense against an untrusted local process on the same box.
|
||||
/// </summary>
|
||||
public static class McpAuthToken
|
||||
{
|
||||
private const string FileName = "mcp-token.txt";
|
||||
|
||||
/// <summary>
|
||||
/// Fallback path used only when a caller doesn't supply one. The tray itself
|
||||
/// passes a path computed from <c>SettingsManager.SettingsDirectoryPath</c>
|
||||
/// (exposed as <c>NodeService.McpTokenPath</c>) so this constant is **not**
|
||||
/// the live location for OpenClaw Tray installations — it's only a default
|
||||
/// for non-tray consumers (CLIs, tests) that don't want to compute one.
|
||||
/// </summary>
|
||||
public static string DefaultPath
|
||||
{
|
||||
get
|
||||
{
|
||||
var root = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData);
|
||||
return Path.Combine(root, "OpenClaw", FileName);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Load the token from <see cref="DefaultPath"/>, creating a fresh random
|
||||
/// one if the file does not exist. Returns the token string.
|
||||
/// </summary>
|
||||
public static string LoadOrCreate() => LoadOrCreate(DefaultPath);
|
||||
|
||||
public static string LoadOrCreate(string path)
|
||||
{
|
||||
// The previous behavior would catch any read exception and silently
|
||||
// regenerate. A transient lock or AV scan would then *rotate the
|
||||
// token*, breaking every configured MCP client. Distinguish missing
|
||||
// (regenerate) from unreadable (throw — visible in startup logs).
|
||||
if (File.Exists(path))
|
||||
{
|
||||
string existing;
|
||||
try
|
||||
{
|
||||
existing = File.ReadAllText(path).Trim();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new IOException(
|
||||
$"MCP token file at '{path}' exists but could not be read: {ex.Message}. " +
|
||||
"Refusing to regenerate (would invalidate all configured clients).", ex);
|
||||
}
|
||||
if (!string.IsNullOrEmpty(existing)) return existing;
|
||||
// Empty file: treat as missing. The atomic write below replaces it.
|
||||
}
|
||||
var dir = Path.GetDirectoryName(path);
|
||||
if (!string.IsNullOrEmpty(dir))
|
||||
{
|
||||
Directory.CreateDirectory(dir);
|
||||
TryRestrictDirectoryAcl(dir);
|
||||
}
|
||||
// Atomic create: stage to a sibling temp file, lock its ACL, then
|
||||
// rename over the target. Without this, a power-loss / process-kill
|
||||
// mid-write would leave a zero-byte token file which the next
|
||||
// LoadOrCreate would treat as "missing" and overwrite — silently
|
||||
// rotating the token.
|
||||
var token = Generate();
|
||||
var tempPath = Path.Combine(
|
||||
string.IsNullOrEmpty(dir) ? Environment.CurrentDirectory : dir,
|
||||
$".{Path.GetFileName(path)}.{Guid.NewGuid():N}.tmp");
|
||||
try
|
||||
{
|
||||
File.WriteAllText(tempPath, token, Encoding.UTF8);
|
||||
TryRestrictSensitiveFileAcl(tempPath);
|
||||
File.Move(tempPath, path, overwrite: true);
|
||||
}
|
||||
catch
|
||||
{
|
||||
try { if (File.Exists(tempPath)) File.Delete(tempPath); } catch { }
|
||||
throw;
|
||||
}
|
||||
TryRestrictSensitiveFileAcl(path);
|
||||
return token;
|
||||
}
|
||||
|
||||
public static string Reset(string path)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(path))
|
||||
throw new ArgumentException("Token path is required", nameof(path));
|
||||
|
||||
var dir = Path.GetDirectoryName(path);
|
||||
if (!string.IsNullOrEmpty(dir))
|
||||
{
|
||||
Directory.CreateDirectory(dir);
|
||||
TryRestrictDirectoryAcl(dir);
|
||||
}
|
||||
|
||||
var token = Generate();
|
||||
var tempPath = Path.Combine(
|
||||
string.IsNullOrEmpty(dir) ? Environment.CurrentDirectory : dir,
|
||||
$".{Path.GetFileName(path)}.{Guid.NewGuid():N}.tmp");
|
||||
try
|
||||
{
|
||||
File.WriteAllText(tempPath, token, Encoding.UTF8);
|
||||
TryRestrictSensitiveFileAcl(tempPath);
|
||||
File.Move(tempPath, path, overwrite: true);
|
||||
}
|
||||
catch
|
||||
{
|
||||
try { if (File.Exists(tempPath)) File.Delete(tempPath); } catch { }
|
||||
throw;
|
||||
}
|
||||
// Move on Windows preserves the source's DACL; re-apply defensively in
|
||||
// case a future rename strategy substitutes a different file.
|
||||
TryRestrictSensitiveFileAcl(path);
|
||||
return token;
|
||||
}
|
||||
|
||||
/// <summary>Read the token without creating a new one. Returns null when missing.</summary>
|
||||
public static string? TryLoad(string? path = null)
|
||||
{
|
||||
path ??= DefaultPath;
|
||||
try
|
||||
{
|
||||
if (!File.Exists(path)) return null;
|
||||
var token = File.ReadAllText(path).Trim();
|
||||
return string.IsNullOrEmpty(token) ? null : token;
|
||||
}
|
||||
catch { return null; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Verify that the token file at <paramref name="path"/> is owned by the
|
||||
/// current user and not readable by anyone outside (Owner, SYSTEM,
|
||||
/// Administrators). Returns null if the file looks fine; returns a
|
||||
/// human-readable warning otherwise so callers can log/toast at startup.
|
||||
/// On non-Windows or when the file does not exist, returns null.
|
||||
/// </summary>
|
||||
public static string? VerifyAcl(string path)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path) || !File.Exists(path)) return null;
|
||||
if (!OperatingSystem.IsWindows()) return null;
|
||||
return VerifyFileAclWindows(path);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Best-effort: lock the supplied directory's ACL to current user + SYSTEM
|
||||
/// + Administrators with inheritance disabled. No-op on non-Windows.
|
||||
/// Callers should call this when the tray's data directory is created so
|
||||
/// other locally-installed apps under the same user can't read the token
|
||||
/// (or anything else we drop alongside it).
|
||||
/// </summary>
|
||||
public static void TryRestrictDataDirectoryAcl(string dir)
|
||||
{
|
||||
if (string.IsNullOrEmpty(dir)) return;
|
||||
if (!OperatingSystem.IsWindows()) return;
|
||||
try { RestrictDirectoryAclWindows(dir); }
|
||||
catch { /* best-effort; acl restriction is defense-in-depth, not load-bearing */ }
|
||||
}
|
||||
|
||||
public static void TryRestrictSensitiveFileAcl(string path)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path)) return;
|
||||
if (!OperatingSystem.IsWindows()) return;
|
||||
try { RestrictFileAclWindows(path); }
|
||||
catch { /* see above */ }
|
||||
}
|
||||
|
||||
private static void TryRestrictDirectoryAcl(string dir) => TryRestrictDataDirectoryAcl(dir);
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
private static void RestrictFileAclWindows(string path)
|
||||
{
|
||||
var info = new FileInfo(path);
|
||||
var sec = new FileSecurity();
|
||||
sec.SetAccessRuleProtection(isProtected: true, preserveInheritance: false);
|
||||
var owner = WindowsIdentity.GetCurrent().User;
|
||||
if (owner == null) return;
|
||||
sec.SetOwner(owner);
|
||||
sec.AddAccessRule(new FileSystemAccessRule(owner,
|
||||
FileSystemRights.FullControl, AccessControlType.Allow));
|
||||
sec.AddAccessRule(new FileSystemAccessRule(
|
||||
new SecurityIdentifier(WellKnownSidType.LocalSystemSid, null),
|
||||
FileSystemRights.FullControl, AccessControlType.Allow));
|
||||
sec.AddAccessRule(new FileSystemAccessRule(
|
||||
new SecurityIdentifier(WellKnownSidType.BuiltinAdministratorsSid, null),
|
||||
FileSystemRights.FullControl, AccessControlType.Allow));
|
||||
info.SetAccessControl(sec);
|
||||
}
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
private static void RestrictDirectoryAclWindows(string dir)
|
||||
{
|
||||
var info = new DirectoryInfo(dir);
|
||||
var sec = new DirectorySecurity();
|
||||
sec.SetAccessRuleProtection(isProtected: true, preserveInheritance: false);
|
||||
var owner = WindowsIdentity.GetCurrent().User;
|
||||
if (owner == null) return;
|
||||
sec.SetOwner(owner);
|
||||
var inherit = InheritanceFlags.ContainerInherit | InheritanceFlags.ObjectInherit;
|
||||
sec.AddAccessRule(new FileSystemAccessRule(owner,
|
||||
FileSystemRights.FullControl, inherit, PropagationFlags.None, AccessControlType.Allow));
|
||||
sec.AddAccessRule(new FileSystemAccessRule(
|
||||
new SecurityIdentifier(WellKnownSidType.LocalSystemSid, null),
|
||||
FileSystemRights.FullControl, inherit, PropagationFlags.None, AccessControlType.Allow));
|
||||
sec.AddAccessRule(new FileSystemAccessRule(
|
||||
new SecurityIdentifier(WellKnownSidType.BuiltinAdministratorsSid, null),
|
||||
FileSystemRights.FullControl, inherit, PropagationFlags.None, AccessControlType.Allow));
|
||||
info.SetAccessControl(sec);
|
||||
}
|
||||
|
||||
[SupportedOSPlatform("windows")]
|
||||
private static string? VerifyFileAclWindows(string path)
|
||||
{
|
||||
try
|
||||
{
|
||||
var info = new FileInfo(path);
|
||||
var sec = info.GetAccessControl();
|
||||
var ownerSid = sec.GetOwner(typeof(SecurityIdentifier)) as SecurityIdentifier;
|
||||
var current = WindowsIdentity.GetCurrent().User;
|
||||
if (current == null) return null;
|
||||
if (ownerSid == null || !ownerSid.Equals(current))
|
||||
{
|
||||
return $"MCP token file owner is {ownerSid?.Value ?? "<unknown>"}; expected current user {current.Value}. Treat the token as compromised and reset it.";
|
||||
}
|
||||
// Walk the ACL — anything granting read rights to a principal
|
||||
// outside {current user, SYSTEM, Administrators} is broader than
|
||||
// expected.
|
||||
var system = new SecurityIdentifier(WellKnownSidType.LocalSystemSid, null);
|
||||
var admins = new SecurityIdentifier(WellKnownSidType.BuiltinAdministratorsSid, null);
|
||||
var rules = sec.GetAccessRules(true, true, typeof(SecurityIdentifier));
|
||||
foreach (FileSystemAccessRule rule in rules)
|
||||
{
|
||||
if (rule.AccessControlType != AccessControlType.Allow) continue;
|
||||
if ((rule.FileSystemRights & (FileSystemRights.Read | FileSystemRights.ReadAndExecute | FileSystemRights.ReadData | FileSystemRights.FullControl | FileSystemRights.Modify)) == 0) continue;
|
||||
if (rule.IdentityReference is SecurityIdentifier sid &&
|
||||
(sid.Equals(current) || sid.Equals(system) || sid.Equals(admins)))
|
||||
continue;
|
||||
return $"MCP token file ACL grants read access to {rule.IdentityReference.Value}, broader than expected. Reset the token if this is unexpected.";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return $"MCP token ACL inspection failed: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>32 bytes (256 bits) of CSPRNG → base64url → 43 ASCII chars (no padding).</summary>
|
||||
private static string Generate()
|
||||
{
|
||||
Span<byte> raw = stackalloc byte[32];
|
||||
RandomNumberGenerator.Fill(raw);
|
||||
return Convert.ToBase64String(raw)
|
||||
.Replace('+', '-')
|
||||
.Replace('/', '_')
|
||||
.TrimEnd('=');
|
||||
}
|
||||
}
|
||||
@ -1,479 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Net;
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Mcp;
|
||||
|
||||
/// <summary>
|
||||
/// Localhost-only HTTP transport for the MCP server.
|
||||
///
|
||||
/// Security model — three layers:
|
||||
/// 1. Loopback bind (127.0.0.1). Unreachable from another machine, regardless
|
||||
/// of firewall configuration.
|
||||
/// 2. Defensive IsLoopback check on every request.
|
||||
/// 3. Browser/CSRF gate: a browser tab fetching http://127.0.0.1:8765/ is
|
||||
/// *also* on the loopback interface, so loopback alone does not protect
|
||||
/// against a malicious page. We reject any request that:
|
||||
/// - presents an Origin header (real MCP clients do not send Origin),
|
||||
/// - has a Host header that is not 127.0.0.1/localhost,
|
||||
/// - is a POST with Content-Type other than application/json.
|
||||
/// Together these force a CORS preflight from a browser, which we never
|
||||
/// satisfy (no Access-Control-Allow-Origin), so the cross-origin call
|
||||
/// fails before reaching capability code.
|
||||
///
|
||||
/// Bearer-token auth in front of request dispatch. Required on every request
|
||||
/// when constructed with a non-null token (the tray always passes one — see
|
||||
/// <c>NodeService.McpTokenPath</c> / <c>McpAuthToken.LoadOrCreate</c>; legacy
|
||||
/// callers that pass null disable the check, kept for in-process tests). The
|
||||
/// token defends against untrusted local processes that could otherwise reach
|
||||
/// the predictable 127.0.0.1:port endpoint — a process running as the same
|
||||
/// user on the same box can read the token file and would defeat this layer,
|
||||
/// but anything sandboxed away from <c>%APPDATA%\OpenClawTray\</c> cannot.
|
||||
///
|
||||
/// Stability defenses (CR-003/CR-005):
|
||||
/// - Per-request hard deadline (RequestTimeoutMs) bounds body-read and
|
||||
/// bridge dispatch so a slow or hung client cannot pin a handler slot
|
||||
/// forever.
|
||||
/// - Active handler tasks are tracked so Stop/Dispose can drain in-flight
|
||||
/// work before tearing down the semaphore and capability services.
|
||||
/// </summary>
|
||||
public sealed class McpHttpServer : IDisposable
|
||||
{
|
||||
private const long MaxRequestBodyBytes = 4L * 1024 * 1024; // 4 MiB
|
||||
// 16 leaves headroom for parallel tool callers (e.g. an editor + Claude
|
||||
// Desktop + a CLI script) without making each connection cheap enough to
|
||||
// become a DoS lever — request size cap + per-handler timeout still bound
|
||||
// memory. Bumped from 8 after queue-stall reports under multi-IDE use.
|
||||
private const int MaxConcurrentHandlers = 16;
|
||||
// Sized to cover the longest legitimate capability: screen.record up to
|
||||
// 300s plus encoding + serialization. Earlier 90s deadline silently abort
|
||||
// ed valid recording requests while the OS capture pipeline kept running
|
||||
// unobserved (unified review H10). Cancellation now flows through the
|
||||
// capability via INodeCapability.ExecuteAsync(NodeInvokeRequest, CT) so
|
||||
// tools that opt in actually stop the underlying work.
|
||||
private const int RequestTimeoutMs = 360_000;
|
||||
// How long Dispose waits for in-flight handlers to drain before forcing
|
||||
// tear-down. Past this point handlers may observe disposed services.
|
||||
private static readonly TimeSpan DrainTimeout = TimeSpan.FromSeconds(5);
|
||||
|
||||
private readonly McpToolBridge _bridge;
|
||||
private readonly int _port;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly HttpListener _listener;
|
||||
/// <summary>
|
||||
/// Required bearer token for HTTP requests. Empty/null disables auth (the
|
||||
/// pre-auth contract — kept so existing dev configs keep working). When set,
|
||||
/// every request must carry <c>Authorization: Bearer <token></c>.
|
||||
/// </summary>
|
||||
private string? _authToken;
|
||||
private readonly CancellationTokenSource _cts = new();
|
||||
private readonly SemaphoreSlim _handlerLimiter = new(MaxConcurrentHandlers, MaxConcurrentHandlers);
|
||||
private readonly object _activeLock = new();
|
||||
private readonly HashSet<Task> _activeHandlers = new();
|
||||
private Task? _acceptLoop;
|
||||
private int _disposed;
|
||||
private int _stopping;
|
||||
|
||||
public int Port => _port;
|
||||
public string Endpoint => $"http://127.0.0.1:{_port}/";
|
||||
|
||||
public McpHttpServer(McpToolBridge bridge, int port, IOpenClawLogger logger, string? authToken = null)
|
||||
{
|
||||
_bridge = bridge ?? throw new ArgumentNullException(nameof(bridge));
|
||||
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
|
||||
_port = port;
|
||||
_authToken = string.IsNullOrEmpty(authToken) ? null : authToken;
|
||||
_listener = new HttpListener();
|
||||
// Loopback binding — not reachable from other machines. Use only the
|
||||
// numeric host on Windows so non-elevated startup does not require a
|
||||
// separate netsh http urlacl reservation for http://localhost:port/.
|
||||
_listener.Prefixes.Add($"http://127.0.0.1:{port}/");
|
||||
}
|
||||
|
||||
public void Start()
|
||||
{
|
||||
if (_listener.IsListening) return;
|
||||
_listener.Start();
|
||||
_acceptLoop = Task.Run(() => AcceptLoopAsync(_cts.Token));
|
||||
_logger.Info($"[MCP] HTTP server listening on {Endpoint}");
|
||||
}
|
||||
|
||||
public void UpdateAuthToken(string? authToken)
|
||||
{
|
||||
Volatile.Write(ref _authToken, string.IsNullOrEmpty(authToken) ? null : authToken);
|
||||
}
|
||||
|
||||
private async Task AcceptLoopAsync(CancellationToken ct)
|
||||
{
|
||||
while (!ct.IsCancellationRequested && _listener.IsListening)
|
||||
{
|
||||
HttpListenerContext ctx;
|
||||
try
|
||||
{
|
||||
ctx = await _listener.GetContextAsync().ConfigureAwait(false);
|
||||
}
|
||||
catch (HttpListenerException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (ObjectDisposedException)
|
||||
{
|
||||
break;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
if (ct.IsCancellationRequested) break;
|
||||
_logger.Error("[MCP] Accept failed", ex);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Defensive: even though the prefix is loopback-only, double-check.
|
||||
if (!IPAddress.IsLoopback(ctx.Request.RemoteEndPoint.Address))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Forbidden, "loopback only");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cap concurrent handlers — a misbehaving local client can otherwise
|
||||
// pin every threadpool thread on long-running screen/camera calls.
|
||||
// Wait briefly: a slot freed during typical request handoff is well
|
||||
// under 50ms, so a small queue here turns transient spikes into
|
||||
// success rather than 503s without inviting unbounded queueing.
|
||||
if (!await _handlerLimiter.WaitAsync(50, ct).ConfigureAwait(false))
|
||||
{
|
||||
Reject(ctx, (HttpStatusCode)503, "server busy");
|
||||
continue;
|
||||
}
|
||||
|
||||
// NOTE: do not pass `ct` to Task.Run. If the token is cancelled
|
||||
// between WaitAsync returning and the delegate starting, Task.Run
|
||||
// skips the delegate and the finally never runs — leaking a
|
||||
// semaphore slot. Let the delegate observe cancellation itself.
|
||||
var handlerTask = Task.Run(() => RunHandlerAsync(ctx));
|
||||
TrackHandler(handlerTask);
|
||||
}
|
||||
}
|
||||
|
||||
private async Task RunHandlerAsync(HttpListenerContext ctx)
|
||||
{
|
||||
// Per-request linked CTS: server shutdown OR per-request deadline.
|
||||
// The bridge call observes this so a wedged tool cannot pin the slot.
|
||||
using var requestCts = CancellationTokenSource.CreateLinkedTokenSource(_cts.Token);
|
||||
requestCts.CancelAfter(RequestTimeoutMs);
|
||||
try
|
||||
{
|
||||
await HandleAsync(ctx, requestCts.Token).ConfigureAwait(false);
|
||||
}
|
||||
finally
|
||||
{
|
||||
// Defensive: if Dispose has already disposed the limiter, swallow.
|
||||
// Without this guard, a handler racing with shutdown can throw
|
||||
// ObjectDisposedException into an unobserved task, which surfaces
|
||||
// through global unhandled-exception handlers.
|
||||
try { _handlerLimiter.Release(); }
|
||||
catch (ObjectDisposedException) { /* server torn down */ }
|
||||
catch (SemaphoreFullException) { /* defensive */ }
|
||||
}
|
||||
}
|
||||
|
||||
private void TrackHandler(Task task)
|
||||
{
|
||||
lock (_activeLock) { _activeHandlers.Add(task); }
|
||||
_ = task.ContinueWith(t =>
|
||||
{
|
||||
lock (_activeLock) { _activeHandlers.Remove(t); }
|
||||
}, CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default);
|
||||
}
|
||||
|
||||
private async Task HandleAsync(HttpListenerContext ctx, CancellationToken ct)
|
||||
{
|
||||
// Snapshot the auth token once. UpdateAuthToken can rotate _authToken
|
||||
// on another thread, and reading the field separately for the null-test
|
||||
// and the comparison would let a single request observe two different
|
||||
// values (e.g. enter the auth branch with the old token, then compare
|
||||
// against the new one — or vice versa).
|
||||
var authToken = Volatile.Read(ref _authToken);
|
||||
try
|
||||
{
|
||||
// CSRF/browser gate — reject anything carrying a browser Origin.
|
||||
// Real MCP HTTP clients (Claude Desktop, Cursor, Claude Code, curl)
|
||||
// do not set Origin. A browser fetch always does.
|
||||
var origin = ctx.Request.Headers["Origin"];
|
||||
if (!string.IsNullOrEmpty(origin))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Forbidden, "origin not allowed");
|
||||
return;
|
||||
}
|
||||
// Belt-and-suspenders: a browser may strip Origin (e.g. via a
|
||||
// privacy extension) but still send Sec-Fetch-Site / Sec-Fetch-Mode
|
||||
// / Referer. Treat any of those as evidence of a browser context.
|
||||
// Native MCP clients don't emit these headers.
|
||||
if (!string.IsNullOrEmpty(ctx.Request.Headers["Sec-Fetch-Site"]) ||
|
||||
!string.IsNullOrEmpty(ctx.Request.Headers["Sec-Fetch-Mode"]) ||
|
||||
!string.IsNullOrEmpty(ctx.Request.Headers["Referer"]))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Forbidden, "browser context not allowed");
|
||||
return;
|
||||
}
|
||||
|
||||
// Host header must match our loopback bind. Defends against DNS
|
||||
// rebinding pivots that route a public hostname to 127.0.0.1.
|
||||
if (!IsHostAllowed(ctx.Request.Headers["Host"]))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Forbidden, "host not allowed");
|
||||
return;
|
||||
}
|
||||
|
||||
// Bearer-token check. Defends against untrusted local processes
|
||||
// (browser helpers, editor extensions) that share the loopback
|
||||
// surface with the legitimate MCP client. Token lives in a
|
||||
// user-only-readable file under %LOCALAPPDATA%; CLI/agent
|
||||
// registration reads from there. Keep this before method dispatch
|
||||
// so alternate verbs cannot bypass the configured token gate.
|
||||
if (authToken != null && !IsAuthorized(authToken, ctx.Request.Headers["Authorization"]))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.Unauthorized, "missing or invalid bearer token");
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.Request.HttpMethod == "GET")
|
||||
{
|
||||
// Friendly probe response — useful for confirming the server is up
|
||||
// from a curl/browser without hitting the JSON-RPC endpoint.
|
||||
WriteText(ctx.Response, HttpStatusCode.OK,
|
||||
$"OpenClaw MCP server. POST JSON-RPC to {Endpoint}", "text/plain");
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.Request.HttpMethod != "POST")
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.MethodNotAllowed, "POST only");
|
||||
return;
|
||||
}
|
||||
|
||||
// Force application/json on POST. Combined with the Origin check,
|
||||
// this means a browser cross-origin fetch must use a non-simple
|
||||
// Content-Type and trigger a CORS preflight, which we don't honor.
|
||||
var contentType = ctx.Request.ContentType ?? "";
|
||||
var semi = contentType.IndexOf(';');
|
||||
var contentTypeBase = (semi >= 0 ? contentType.Substring(0, semi) : contentType).Trim();
|
||||
if (!string.Equals(contentTypeBase, "application/json", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.UnsupportedMediaType, "application/json required");
|
||||
return;
|
||||
}
|
||||
|
||||
// Reject bodies that exceed our cap *before* reading them — a
|
||||
// multi-GB POST would otherwise OOM the tray.
|
||||
if (ctx.Request.ContentLength64 > MaxRequestBodyBytes)
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.RequestEntityTooLarge, "request body too large");
|
||||
return;
|
||||
}
|
||||
|
||||
string body;
|
||||
try
|
||||
{
|
||||
body = await ReadBodyAsync(ctx.Request, MaxRequestBodyBytes, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.RequestEntityTooLarge, "request body too large");
|
||||
return;
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
// Slow-body or stuck client — free the slot rather than blocking forever.
|
||||
Reject(ctx, HttpStatusCode.RequestTimeout, "request timed out");
|
||||
return;
|
||||
}
|
||||
|
||||
string? responseBody;
|
||||
try
|
||||
{
|
||||
responseBody = await _bridge.HandleRequestAsync(body, ct).ConfigureAwait(false);
|
||||
}
|
||||
catch (OperationCanceledException) when (ct.IsCancellationRequested)
|
||||
{
|
||||
Reject(ctx, HttpStatusCode.RequestTimeout, "request timed out");
|
||||
return;
|
||||
}
|
||||
|
||||
if (responseBody == null)
|
||||
{
|
||||
// Notification — JSON-RPC says no body. 204 is the most honest signal.
|
||||
ctx.Response.StatusCode = (int)HttpStatusCode.NoContent;
|
||||
ctx.Response.Close();
|
||||
return;
|
||||
}
|
||||
|
||||
WriteText(ctx.Response, HttpStatusCode.OK, responseBody, "application/json");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Error("[MCP] Request failed", ex);
|
||||
// Response may already be partially written or closed; swallow.
|
||||
try { Reject(ctx, HttpStatusCode.InternalServerError, "internal error"); }
|
||||
catch { /* response already disposed */ }
|
||||
}
|
||||
}
|
||||
|
||||
private static bool IsAuthorized(string authToken, string? authHeader)
|
||||
{
|
||||
if (string.IsNullOrEmpty(authHeader)) return false;
|
||||
// Accept "Bearer <token>" (RFC 6750) — case-insensitive scheme, exact token.
|
||||
const string scheme = "Bearer ";
|
||||
if (!authHeader.StartsWith(scheme, StringComparison.OrdinalIgnoreCase)) return false;
|
||||
var presented = authHeader.Substring(scheme.Length).Trim();
|
||||
if (presented.Length != authToken.Length) return false;
|
||||
// Constant-time compare; both strings already known length.
|
||||
return CryptographicOperations.FixedTimeEquals(
|
||||
Encoding.UTF8.GetBytes(presented),
|
||||
Encoding.UTF8.GetBytes(authToken));
|
||||
}
|
||||
|
||||
private static bool IsHostAllowed(string? host)
|
||||
{
|
||||
if (string.IsNullOrEmpty(host)) return false;
|
||||
var trimmed = host.Trim();
|
||||
// IPv6 form: [::1]:port — strip the bracketed address.
|
||||
if (trimmed.StartsWith('['))
|
||||
{
|
||||
var closeBracket = trimmed.IndexOf(']');
|
||||
if (closeBracket < 0) return false;
|
||||
var v6 = trimmed.Substring(1, closeBracket - 1);
|
||||
return string.Equals(v6, "::1", StringComparison.Ordinal);
|
||||
}
|
||||
// IPv4 / hostname: strip trailing :port if present.
|
||||
var colon = trimmed.LastIndexOf(':');
|
||||
var hostname = (colon > 0 ? trimmed.Substring(0, colon) : trimmed).Trim();
|
||||
return string.Equals(hostname, "127.0.0.1", StringComparison.Ordinal)
|
||||
|| string.Equals(hostname, "::1", StringComparison.Ordinal)
|
||||
|| string.Equals(hostname, "localhost", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static async Task<string> ReadBodyAsync(HttpListenerRequest request, long maxBytes, CancellationToken ct)
|
||||
{
|
||||
// Bounded read — never trust ContentLength as a sole limit; the client
|
||||
// can send chunked encoding or just lie. Read up to maxBytes+1 and
|
||||
// throw if we crossed the cap. The cancellation token enforces the
|
||||
// per-request deadline so a slow-body client can't hold a handler slot.
|
||||
// Pool the read buffer so we don't allocate 8 KiB per request — under
|
||||
// load these are a noticeable LOH-adjacent allocation.
|
||||
var encoding = request.ContentEncoding ?? Encoding.UTF8;
|
||||
var buffer = System.Buffers.ArrayPool<byte>.Shared.Rent(8192);
|
||||
try
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
long total = 0;
|
||||
while (true)
|
||||
{
|
||||
var n = await request.InputStream.ReadAsync(buffer.AsMemory(0, buffer.Length), ct).ConfigureAwait(false);
|
||||
if (n <= 0) break;
|
||||
total += n;
|
||||
if (total > maxBytes) throw new InvalidDataException("request body exceeds cap");
|
||||
ms.Write(buffer, 0, n);
|
||||
}
|
||||
return encoding.GetString(ms.GetBuffer(), 0, (int)ms.Length);
|
||||
}
|
||||
finally
|
||||
{
|
||||
System.Buffers.ArrayPool<byte>.Shared.Return(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
private static void Reject(HttpListenerContext ctx, HttpStatusCode status, string reason)
|
||||
{
|
||||
try { WriteText(ctx.Response, status, reason, "text/plain"); }
|
||||
catch { /* response already disposed */ }
|
||||
}
|
||||
|
||||
private static void WriteText(HttpListenerResponse response, HttpStatusCode status, string body, string contentType)
|
||||
{
|
||||
var bytes = Encoding.UTF8.GetBytes(body);
|
||||
response.StatusCode = (int)status;
|
||||
response.ContentType = contentType;
|
||||
response.ContentLength64 = bytes.Length;
|
||||
using var output = response.OutputStream;
|
||||
output.Write(bytes, 0, bytes.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Stop accepting new requests, cancel in-flight ones, and wait for
|
||||
/// active handlers to drain (or the timeout to elapse) before returning.
|
||||
/// Idempotent. Returns when it is safe to dispose downstream services
|
||||
/// (capabilities, capture services) without racing live handlers.
|
||||
/// </summary>
|
||||
public Task StopAsync(TimeSpan drainTimeout)
|
||||
{
|
||||
// Idempotence is governed by _stopping (not _disposed) so that Dispose
|
||||
// can call the same drain path *after* setting _disposed=1. The previous
|
||||
// code keyed on _disposed and silently skipped the drain in that flow.
|
||||
if (Interlocked.Exchange(ref _stopping, 1) != 0) return Task.CompletedTask;
|
||||
return StopCoreAsync(drainTimeout);
|
||||
}
|
||||
|
||||
private async Task StopCoreAsync(TimeSpan drainTimeout)
|
||||
{
|
||||
try { _cts.Cancel(); } catch { /* already cancelled or disposed */ }
|
||||
try { if (_listener.IsListening) _listener.Stop(); } catch { /* already stopped */ }
|
||||
|
||||
// Snapshot before awaiting — handlers remove themselves on completion,
|
||||
// and we don't want enumeration to race the continuation.
|
||||
Task[] toAwait;
|
||||
lock (_activeLock) { toAwait = new Task[_activeHandlers.Count]; _activeHandlers.CopyTo(toAwait); }
|
||||
|
||||
var allHandlers = Task.WhenAll(toAwait);
|
||||
var deadline = Task.Delay(drainTimeout);
|
||||
var winner = await Task.WhenAny(allHandlers, deadline).ConfigureAwait(false);
|
||||
if (winner == deadline && toAwait.Length > 0)
|
||||
{
|
||||
int still;
|
||||
lock (_activeLock) { still = _activeHandlers.Count; }
|
||||
_logger.Warn($"[MCP] Drain timeout ({drainTimeout.TotalSeconds:F1}s); {still} handler(s) still running");
|
||||
}
|
||||
|
||||
if (_acceptLoop != null)
|
||||
{
|
||||
try { await Task.WhenAny(_acceptLoop, Task.Delay(TimeSpan.FromSeconds(1))).ConfigureAwait(false); }
|
||||
catch { /* loop may have errored */ }
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (Interlocked.Exchange(ref _disposed, 1) != 0) return;
|
||||
// Run the drain unconditionally on dispose. We can't go through the
|
||||
// public StopAsync because a prior caller may already have set
|
||||
// _stopping — we still need to wait for the drain to finish before
|
||||
// tearing down the limiter and CTS.
|
||||
if (Interlocked.Exchange(ref _stopping, 1) == 0)
|
||||
{
|
||||
try { StopCoreAsync(DrainTimeout).GetAwaiter().GetResult(); }
|
||||
catch (Exception ex) { _logger.Warn($"[MCP] Drain error: {ex.Message}"); }
|
||||
}
|
||||
else
|
||||
{
|
||||
// A prior StopAsync is in flight; wait briefly for it to finish so
|
||||
// we don't dispose the limiter while a handler is still inside it.
|
||||
lock (_activeLock)
|
||||
{
|
||||
if (_activeHandlers.Count > 0)
|
||||
{
|
||||
Task[] toAwait = new Task[_activeHandlers.Count];
|
||||
_activeHandlers.CopyTo(toAwait);
|
||||
try { Task.WhenAny(Task.WhenAll(toAwait), Task.Delay(DrainTimeout)).GetAwaiter().GetResult(); }
|
||||
catch { /* swallow — best-effort */ }
|
||||
}
|
||||
}
|
||||
}
|
||||
try { _listener.Close(); } catch { /* already closed */ }
|
||||
_cts.Dispose();
|
||||
_handlerLimiter.Dispose();
|
||||
}
|
||||
}
|
||||
@ -1,436 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared.Mcp;
|
||||
|
||||
/// <summary>
|
||||
/// Transport-agnostic MCP server core. Auto-discovers tools from the live
|
||||
/// <see cref="INodeCapability"/> registry — registering a new capability on
|
||||
/// the node client immediately exposes its commands as MCP tools.
|
||||
/// </summary>
|
||||
public class McpToolBridge
|
||||
{
|
||||
private const string ProtocolVersion = "2024-11-05";
|
||||
|
||||
private readonly Func<IReadOnlyList<INodeCapability>> _capabilityProvider;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly string _serverName;
|
||||
private readonly string _serverVersion;
|
||||
|
||||
private static readonly JsonSerializerOptions PayloadJsonOptions = new()
|
||||
{
|
||||
WriteIndented = false,
|
||||
};
|
||||
|
||||
public McpToolBridge(
|
||||
Func<IReadOnlyList<INodeCapability>> capabilityProvider,
|
||||
IOpenClawLogger? logger = null,
|
||||
string serverName = "openclaw-tray-mcp",
|
||||
string serverVersion = "0.0.0")
|
||||
{
|
||||
_capabilityProvider = capabilityProvider ?? throw new ArgumentNullException(nameof(capabilityProvider));
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_serverName = serverName;
|
||||
_serverVersion = serverVersion;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Dispatch a JSON-RPC request body and return the response body (or null
|
||||
/// for a JSON-RPC notification, which receives no response).
|
||||
/// </summary>
|
||||
public Task<string?> HandleRequestAsync(string requestBody)
|
||||
=> HandleRequestAsync(requestBody, CancellationToken.None);
|
||||
|
||||
/// <summary>
|
||||
/// Dispatch a JSON-RPC request body, observing a cancellation token (used
|
||||
/// by the HTTP transport to enforce a per-request deadline). When the
|
||||
/// token fires during a tool dispatch, the call surfaces as a tool error
|
||||
/// ("request timed out") so the slot is freed even if the underlying
|
||||
/// capability work continues to run.
|
||||
/// </summary>
|
||||
public async Task<string?> HandleRequestAsync(string requestBody, CancellationToken cancellationToken)
|
||||
{
|
||||
JsonDocument doc;
|
||||
try
|
||||
{
|
||||
doc = JsonDocument.Parse(requestBody);
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
return WriteError(null, JsonRpcErrorCode.ParseError, $"Parse error: {ex.Message}");
|
||||
}
|
||||
|
||||
using (doc)
|
||||
{
|
||||
var root = doc.RootElement;
|
||||
if (root.ValueKind != JsonValueKind.Object)
|
||||
return WriteError(null, JsonRpcErrorCode.InvalidRequest, "Request must be a JSON object");
|
||||
|
||||
var idElement = root.TryGetProperty("id", out var idProp) ? idProp : (JsonElement?)null;
|
||||
var hasId = idElement.HasValue && idElement.Value.ValueKind != JsonValueKind.Null;
|
||||
|
||||
if (!root.TryGetProperty("method", out var methodProp) || methodProp.ValueKind != JsonValueKind.String)
|
||||
{
|
||||
return hasId
|
||||
? WriteError(idElement, JsonRpcErrorCode.InvalidRequest, "Missing 'method'")
|
||||
: null;
|
||||
}
|
||||
|
||||
var method = methodProp.GetString()!;
|
||||
var paramsElement = root.TryGetProperty("params", out var p) ? p : default;
|
||||
|
||||
try
|
||||
{
|
||||
object? result = method switch
|
||||
{
|
||||
"initialize" => HandleInitialize(),
|
||||
"ping" => new { },
|
||||
"notifications/initialized" => null,
|
||||
"tools/list" => HandleToolsList(),
|
||||
"tools/call" => await HandleToolsCallAsync(paramsElement, cancellationToken),
|
||||
// Some clients (notably Cursor) probe these on startup. Returning
|
||||
// empty lists is friendlier than MethodNotFound — both feature sets
|
||||
// are deferred but compatible by being absent rather than failing.
|
||||
"resources/list" => new { resources = Array.Empty<object>() },
|
||||
"prompts/list" => new { prompts = Array.Empty<object>() },
|
||||
_ => throw new McpMethodNotFoundException(method),
|
||||
};
|
||||
|
||||
if (!hasId) return null; // notification — no response
|
||||
return WriteResult(idElement, result ?? new { });
|
||||
}
|
||||
catch (McpMethodNotFoundException ex)
|
||||
{
|
||||
return hasId
|
||||
? WriteError(idElement, JsonRpcErrorCode.MethodNotFound, ex.Message)
|
||||
: null;
|
||||
}
|
||||
catch (McpToolException ex)
|
||||
{
|
||||
return hasId
|
||||
? WriteToolError(idElement, ex.Message)
|
||||
: null;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Full exception with stack goes to the log; the wire response
|
||||
// gets a generic message so we don't leak internals to clients.
|
||||
_logger.Error($"[MCP] Handler error for {method}", ex);
|
||||
return hasId
|
||||
? WriteError(idElement, JsonRpcErrorCode.InternalError, "internal error")
|
||||
: null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private object HandleInitialize() => new
|
||||
{
|
||||
protocolVersion = ProtocolVersion,
|
||||
capabilities = new
|
||||
{
|
||||
tools = new { listChanged = false },
|
||||
},
|
||||
serverInfo = new
|
||||
{
|
||||
name = _serverName,
|
||||
version = _serverVersion,
|
||||
},
|
||||
};
|
||||
|
||||
private object HandleToolsList()
|
||||
{
|
||||
var caps = _capabilityProvider();
|
||||
var tools = new List<object>();
|
||||
foreach (var cap in caps)
|
||||
{
|
||||
foreach (var cmd in cap.Commands)
|
||||
{
|
||||
tools.Add(new
|
||||
{
|
||||
name = cmd,
|
||||
description = CommandDescriptions.TryGetValue(cmd, out var desc)
|
||||
? desc
|
||||
: $"{cap.Category} capability: {cmd}",
|
||||
inputSchema = new
|
||||
{
|
||||
type = "object",
|
||||
additionalProperties = true,
|
||||
properties = new { },
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
return new { tools };
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// The complete set of commands documented in <see cref="CommandDescriptions"/>.
|
||||
/// Exposed as a stable surface so out-of-process documentation (winnode's
|
||||
/// skill.md) can be drift-tested against the canonical capability surface.
|
||||
/// </summary>
|
||||
public static IReadOnlyCollection<string> KnownCommands => CommandDescriptions.Keys;
|
||||
|
||||
/// <summary>
|
||||
/// Per-command descriptions advertised via <c>tools/list</c>. Sourced from
|
||||
/// the OpenClaw docs (docs/nodes/index.md, docs/platforms/mac/canvas.md) and
|
||||
/// the capability implementations under <c>OpenClaw.Shared.Capabilities</c>.
|
||||
/// Unknown commands fall back to a generic <c>{category} capability: {cmd}</c>
|
||||
/// label so newly-added capabilities still render before this table is updated.
|
||||
/// </summary>
|
||||
private static readonly Dictionary<string, string> CommandDescriptions = new(StringComparer.Ordinal)
|
||||
{
|
||||
// system.*
|
||||
["system.notify"] =
|
||||
"Show a Windows toast notification on the node. Args: title (string, default 'OpenClaw'), body (string), subtitle (string), sound (bool, default true). Returns { sent: true }.",
|
||||
["system.run"] =
|
||||
"Execute a shell command on the Windows node host. Args: command (string or string[] argv, required), args (string[]), shell (string), cwd (string), timeoutMs (int, default 30000), env (object). Subject to the local exec approval policy. Returns { stdout, stderr, exitCode, timedOut, durationMs }.",
|
||||
["system.run.prepare"] =
|
||||
"Pre-flight a system.run invocation: returns the parsed execution plan (argv, cwd, rawCommand, agentId, sessionKey) without running anything. The gateway uses this to build its approval context before the actual run.",
|
||||
["system.which"] =
|
||||
"Resolve executable names to absolute paths by searching PATH (PATHEXT-aware on Windows). Args: bins (string[], required). Returns { bins: { name: resolvedPath, ... } } including only names that were found.",
|
||||
["system.execApprovals.get"] =
|
||||
"Return the current exec approval policy: { enabled, defaultAction ('allow'|'deny'|'prompt'), rules: [{ pattern, action, shells, description, enabled }, ...] }.",
|
||||
["system.execApprovals.set"] =
|
||||
"Replace the exec approval policy. Args: rules (array of { pattern, action, shells?, description?, enabled? }), defaultAction (string, optional). Persisted to disk; used by future system.run calls.",
|
||||
|
||||
// canvas.* — agent-controlled WebView2 panel for HTML/CSS/JS, A2UI, and small interactive UI surfaces.
|
||||
["canvas.present"] =
|
||||
"Show the agent-controlled Canvas window (WebView2). Args: url (string) or html (string), width (int, default 800), height (int, default 600), x/y (int, -1 = center), title (string, default 'Canvas'), alwaysOnTop (bool, default false). The Canvas is a lightweight visual workspace for HTML/CSS/JS, A2UI, and small interactive UI surfaces.",
|
||||
["canvas.hide"] =
|
||||
"Hide the Canvas window without destroying its state.",
|
||||
["canvas.navigate"] =
|
||||
"Navigate the existing Canvas to a new location. Args: url (string, required) — accepts http(s), file://, or local canvas paths.",
|
||||
["canvas.eval"] =
|
||||
"Evaluate a JavaScript expression inside the Canvas WebView and return its result. Args: script | javaScript | javascript (string, required).",
|
||||
["canvas.snapshot"] =
|
||||
"Capture the Canvas viewport as a base64-encoded image. Args: format ('png'|'jpeg', default 'png'), maxWidth (int, default 1200), quality (int 1-100, default 80). Returns { format, base64 }.",
|
||||
["canvas.a2ui.push"] =
|
||||
"Push A2UI v0.8 server→client messages to the Canvas as JSONL. Supported message kinds: beginRendering, surfaceUpdate, dataModelUpdate, deleteSurface (createSurface / v0.9 is rejected). Args: jsonl (string) or jsonlPath (string, must live under the system temp directory), props (object, optional).",
|
||||
["canvas.a2ui.reset"] =
|
||||
"Reset the Canvas A2UI state, clearing any rendered surfaces.",
|
||||
["canvas.a2ui.dump"] =
|
||||
"READ-ALL: Return the full state of every currently-rendered A2UI surface — the component tree, every data-model entry, and any registered secret paths (values redacted). Operators granting MCP access should treat this as equivalent to a screenshot of every open surface, not a normal observability tool.",
|
||||
["canvas.caps"] =
|
||||
"Report the A2UI feature flags this canvas runtime supports (component catalog, max surfaces, render depth, value-size caps). Diagnostic; no side effects.",
|
||||
["canvas.a2ui.pushJSONL"] =
|
||||
"Streaming variant of canvas.a2ui.push for very large surfaces. Same protocol contract; jsonlPath argument must live under the system temp directory and is opened via FileStream + GetFinalPathNameByHandle to defeat reparse-point traversal.",
|
||||
|
||||
// screen.* — names match the canonical OpenClaw protocol
|
||||
// (apps/shared/OpenClawKit/Sources/OpenClawKit/ScreenCommands.swift).
|
||||
// No screen.list or screen.capture exist in the protocol; previous
|
||||
// drift advertised tools that didn't actually resolve.
|
||||
["screen.snapshot"] =
|
||||
"Capture a screenshot of the specified display. Args: format ('png'|'jpeg', default 'png'), maxWidth (int, default 1920), quality (int 1-100, default 80), monitor / screenIndex (int, default 0 = primary), includePointer (bool, default true). Returns { format, width, height, base64, image } where image is a data: URL.",
|
||||
["screen.record"] =
|
||||
"Record the specified display for a bounded duration. Args: durationMs (int, required, max 300000), format ('mp4'|'webm', default 'mp4'), monitor / screenIndex (int, default 0 = primary), maxWidth (int, default 1920), fps (int, default 30). Returns { format, durationMs, base64 }.",
|
||||
|
||||
// camera.*
|
||||
["camera.list"] =
|
||||
"List cameras attached to the Windows node. Returns { cameras: [{ deviceId, name, isDefault }, ...] }.",
|
||||
["camera.snap"] =
|
||||
"Capture a still photo from a camera. Args: deviceId (string, optional — defaults to system default camera), format ('jpeg'|'png', default 'jpeg'), maxWidth (int, default 1280), quality (int 1-100, default 80). Returns { format, width, height, base64 }.",
|
||||
["camera.clip"] =
|
||||
"Record a short clip from a camera. Args: deviceId (string, optional), durationMs (int, required, max 60000), format ('mp4'|'webm', default 'mp4'), maxWidth (int, default 1280). Returns { format, durationMs, base64 }.",
|
||||
|
||||
// stt.* — microphone capture → text. Default-off; privacy-sensitive.
|
||||
// Single engine: Whisper.net runs locally on the device.
|
||||
["stt.transcribe"] =
|
||||
"Capture microphone audio for a bounded duration and return the transcribed text. Args: maxDurationMs (int, required, > 0, max 30000), language (string, optional BCP-47 tag like 'en-US' or 'auto' — falls back to the configured SttLanguage setting). Returns { transcribed, text, durationMs, language, engineEffective ('whisper') }. Whisper model is downloaded on first use; until then this returns an error pointing to Voice Settings. Requires NodeSttEnabled.",
|
||||
["stt.listen"] =
|
||||
"Capture microphone audio with voice-activity detection and return when the user stops speaking, or after timeoutMs. Args: timeoutMs (int, optional, default 30000, range 1000..120000), language (string, optional BCP-47 tag or 'auto', default 'auto'). Returns { text, language, durationMs, segments[{ text, startMs, endMs }], engineEffective ('whisper') }. Result is the full silence-bounded utterance (all Whisper segments concatenated), not a partial first segment. Requires NodeSttEnabled.",
|
||||
["stt.status"] =
|
||||
"Report STT engine readiness. No args. Returns { engine ('whisper'), readiness ('ready'|'initializing'|'model-downloading'|'model-not-downloaded'|'unavailable'), modelDownloadProgress (0..1 or null), isListenWithVadSupported (bool), isBoundedTranscribeSupported (bool) }. Carries no PII (no transcript history, no language history, no device IDs, no model paths).",
|
||||
|
||||
// tts.*
|
||||
["tts.speak"] =
|
||||
"Speak text aloud on the Windows node. Args: text (string, required), provider ('piper'|'windows'|'elevenlabs', optional — falls back to the configured TtsProvider setting, default 'piper' for fresh installs), voiceId (string, optional — overrides the per-provider configured voice), model (string, optional, ElevenLabs only), interrupt (bool, default false — interrupts any in-progress playback). Returns { spoken, provider, contentType, durationMs }.",
|
||||
|
||||
// app.*
|
||||
["app.navigate"] =
|
||||
"Navigate the companion app to a specific page (e.g., 'home', 'sessions', 'settings'). Args: page (string, required). Returns { navigated, page }.",
|
||||
["app.status"] =
|
||||
"Get current connection status, node state, and gateway info. Returns { connectionStatus, nodeConnected, nodePaired, nodePendingApproval, gatewayVersion, sessionCount, nodeCount }.",
|
||||
["app.sessions"] =
|
||||
"List active sessions with optional agent filter. Args: agentId (string, optional). Returns array of { Key, Status, Model, AgeText, tokens }.",
|
||||
["app.agents"] =
|
||||
"List agents from the connected gateway. Returns the raw agents JSON array.",
|
||||
["app.nodes"] =
|
||||
"List connected nodes and their capabilities. Returns array of { DisplayName, NodeId, IsOnline, Platform, CapabilityCount }.",
|
||||
["app.config.get"] =
|
||||
"Read gateway configuration value at a dot-path. Args: path (string, optional). Returns the config subtree or full config.",
|
||||
["app.settings.get"] =
|
||||
"Read a local app setting by name. Args: name (string, required). Returns the setting value.",
|
||||
["app.settings.set"] =
|
||||
"Set a local app setting (name and value). Args: name (string, required), value (string, required). Returns { name, value }.",
|
||||
["app.menu"] =
|
||||
"Get tray menu state (status, session count, node count). Returns array of menu items.",
|
||||
["app.search"] =
|
||||
"Search the command palette and return matching commands. Args: query (string, required). Returns array of { Title, Subtitle, Icon }.",
|
||||
};
|
||||
|
||||
private async Task<object> HandleToolsCallAsync(JsonElement parameters, CancellationToken cancellationToken)
|
||||
{
|
||||
if (parameters.ValueKind != JsonValueKind.Object)
|
||||
throw new McpToolException("Invalid params: expected object");
|
||||
|
||||
if (!parameters.TryGetProperty("name", out var nameProp) || nameProp.ValueKind != JsonValueKind.String)
|
||||
throw new McpToolException("Missing 'name'");
|
||||
|
||||
var name = nameProp.GetString()!;
|
||||
if (string.IsNullOrWhiteSpace(name))
|
||||
throw new McpToolException("Empty tool name");
|
||||
|
||||
var args = parameters.TryGetProperty("arguments", out var argsProp) ? argsProp : default;
|
||||
if (args.ValueKind != JsonValueKind.Undefined
|
||||
&& args.ValueKind != JsonValueKind.Null
|
||||
&& args.ValueKind != JsonValueKind.Object)
|
||||
{
|
||||
throw new McpToolException("'arguments' must be a JSON object if present");
|
||||
}
|
||||
|
||||
var caps = _capabilityProvider();
|
||||
INodeCapability? capability = null;
|
||||
foreach (var c in caps)
|
||||
{
|
||||
if (!c.CanHandle(name)) continue;
|
||||
capability = c;
|
||||
break;
|
||||
}
|
||||
if (capability == null)
|
||||
throw new McpToolException($"Unknown tool: {name}");
|
||||
|
||||
var request = new NodeInvokeRequest
|
||||
{
|
||||
Id = Guid.NewGuid().ToString(),
|
||||
Command = name,
|
||||
Args = args,
|
||||
};
|
||||
|
||||
_logger.Debug($"[MCP] tools/call {name}");
|
||||
// Pass the cancellation token through. Capabilities that override the
|
||||
// CT-aware overload (long-running screen/camera capture) will stop
|
||||
// their underlying pipeline on timeout; legacy capabilities fall back
|
||||
// to the no-CT signature and still benefit from WaitAsync freeing the
|
||||
// bridge's handler slot.
|
||||
NodeInvokeResponse response;
|
||||
try
|
||||
{
|
||||
response = await capability.ExecuteAsync(request, cancellationToken).WaitAsync(cancellationToken);
|
||||
}
|
||||
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
_logger.Warn($"[MCP] tools/call {name} timed out");
|
||||
throw new McpToolException("request timed out");
|
||||
}
|
||||
|
||||
if (!response.Ok)
|
||||
throw new McpToolException(response.Error ?? "tool execution failed");
|
||||
|
||||
var payloadJson = response.Payload is null
|
||||
? "null"
|
||||
: JsonSerializer.Serialize(response.Payload, PayloadJsonOptions);
|
||||
|
||||
return new
|
||||
{
|
||||
content = new[]
|
||||
{
|
||||
new { type = "text", text = payloadJson },
|
||||
},
|
||||
isError = false,
|
||||
};
|
||||
}
|
||||
|
||||
private static string WriteResult(JsonElement? id, object result)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
using (var w = new Utf8JsonWriter(ms))
|
||||
{
|
||||
w.WriteStartObject();
|
||||
w.WriteString("jsonrpc", "2.0");
|
||||
WriteId(w, id);
|
||||
w.WritePropertyName("result");
|
||||
JsonSerializer.Serialize(w, result, PayloadJsonOptions);
|
||||
w.WriteEndObject();
|
||||
}
|
||||
return System.Text.Encoding.UTF8.GetString(ms.GetBuffer(), 0, (int)ms.Length);
|
||||
}
|
||||
|
||||
private static string WriteError(JsonElement? id, int code, string message)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
using (var w = new Utf8JsonWriter(ms))
|
||||
{
|
||||
w.WriteStartObject();
|
||||
w.WriteString("jsonrpc", "2.0");
|
||||
WriteId(w, id);
|
||||
w.WriteStartObject("error");
|
||||
w.WriteNumber("code", code);
|
||||
w.WriteString("message", message);
|
||||
w.WriteEndObject();
|
||||
w.WriteEndObject();
|
||||
}
|
||||
return System.Text.Encoding.UTF8.GetString(ms.GetBuffer(), 0, (int)ms.Length);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Tool execution failures are reported as a successful JSON-RPC result
|
||||
/// with isError=true (per MCP spec), not as a JSON-RPC error.
|
||||
/// </summary>
|
||||
private static string WriteToolError(JsonElement? id, string message)
|
||||
{
|
||||
var result = new
|
||||
{
|
||||
content = new[] { new { type = "text", text = message } },
|
||||
isError = true,
|
||||
};
|
||||
return WriteResult(id, result);
|
||||
}
|
||||
|
||||
private static void WriteId(Utf8JsonWriter w, JsonElement? id)
|
||||
{
|
||||
w.WritePropertyName("id");
|
||||
if (!id.HasValue || id.Value.ValueKind == JsonValueKind.Null)
|
||||
{
|
||||
w.WriteNullValue();
|
||||
return;
|
||||
}
|
||||
switch (id.Value.ValueKind)
|
||||
{
|
||||
case JsonValueKind.Number:
|
||||
// Preserve the original number form — fractional, big-int, etc.
|
||||
// GetInt64 would throw on non-integer or out-of-range ids and
|
||||
// strip the request id from the error response, breaking the
|
||||
// client's response correlation.
|
||||
w.WriteRawValue(id.Value.GetRawText(), skipInputValidation: true);
|
||||
break;
|
||||
case JsonValueKind.String:
|
||||
w.WriteStringValue(id.Value.GetString());
|
||||
break;
|
||||
default:
|
||||
w.WriteNullValue();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static class JsonRpcErrorCode
|
||||
{
|
||||
public const int ParseError = -32700;
|
||||
public const int InvalidRequest = -32600;
|
||||
public const int MethodNotFound = -32601;
|
||||
public const int InternalError = -32603;
|
||||
}
|
||||
|
||||
private sealed class McpMethodNotFoundException : Exception
|
||||
{
|
||||
public McpMethodNotFoundException(string method) : base($"Method not found: {method}") { }
|
||||
}
|
||||
|
||||
private sealed class McpToolException : Exception
|
||||
{
|
||||
public McpToolException(string message) : base(message) { }
|
||||
}
|
||||
}
|
||||
@ -5,8 +5,6 @@ namespace OpenClaw.Shared;
|
||||
/// </summary>
|
||||
public static class MenuSizingHelper
|
||||
{
|
||||
private const double ScaleTolerance = 0.001;
|
||||
|
||||
public static int ConvertPixelsToViewUnits(int pixels, uint dpi)
|
||||
{
|
||||
if (pixels <= 0) return 0;
|
||||
@ -15,19 +13,6 @@ public static class MenuSizingHelper
|
||||
return Math.Max(1, (int)Math.Floor(pixels * 96.0 / dpi));
|
||||
}
|
||||
|
||||
public static bool HasDpiOrScaleChanged(uint previousDpi, double previousRasterizationScale, uint currentDpi, double currentRasterizationScale)
|
||||
{
|
||||
previousDpi = NormalizeDpi(previousDpi);
|
||||
currentDpi = NormalizeDpi(currentDpi);
|
||||
|
||||
if (previousDpi != currentDpi)
|
||||
return true;
|
||||
|
||||
var previousScale = NormalizeScale(previousRasterizationScale);
|
||||
var currentScale = NormalizeScale(currentRasterizationScale);
|
||||
return Math.Abs(previousScale - currentScale) > ScaleTolerance;
|
||||
}
|
||||
|
||||
public static int CalculateWindowHeight(int contentHeight, int workAreaHeight, int minimumHeight = 100)
|
||||
{
|
||||
if (contentHeight < 0) contentHeight = 0;
|
||||
@ -40,9 +25,4 @@ public static class MenuSizingHelper
|
||||
var desiredHeight = Math.Max(contentHeight, minimumVisibleHeight);
|
||||
return Math.Min(desiredHeight, workAreaHeight);
|
||||
}
|
||||
|
||||
private static uint NormalizeDpi(uint dpi) => dpi == 0 ? 96u : dpi;
|
||||
|
||||
private static double NormalizeScale(double scale) =>
|
||||
double.IsFinite(scale) && scale > 0 ? scale : 1.0;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,6 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
@ -54,25 +53,15 @@ public interface INodeCapability
|
||||
{
|
||||
/// <summary>The capability category (canvas, camera, screen, system)</summary>
|
||||
string Category { get; }
|
||||
|
||||
|
||||
/// <summary>Commands this capability can handle</summary>
|
||||
IReadOnlyList<string> Commands { get; }
|
||||
|
||||
|
||||
/// <summary>Check if this capability can handle the given command</summary>
|
||||
bool CanHandle(string command);
|
||||
|
||||
|
||||
/// <summary>Execute a command and return the result</summary>
|
||||
Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request);
|
||||
|
||||
/// <summary>
|
||||
/// Execute a command with a cancellation token. The default implementation
|
||||
/// just calls <see cref="ExecuteAsync(NodeInvokeRequest)"/>; capabilities
|
||||
/// with long-running work (screen.record, camera.clip) should override so
|
||||
/// MCP request cancellation propagates into the underlying capture
|
||||
/// pipeline rather than orphaning it.
|
||||
/// </summary>
|
||||
Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request, CancellationToken cancellationToken)
|
||||
=> ExecuteAsync(request);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@ -82,23 +71,20 @@ public abstract class NodeCapabilityBase : INodeCapability
|
||||
{
|
||||
public abstract string Category { get; }
|
||||
public abstract IReadOnlyList<string> Commands { get; }
|
||||
|
||||
|
||||
protected IOpenClawLogger Logger { get; }
|
||||
|
||||
|
||||
protected NodeCapabilityBase(IOpenClawLogger logger)
|
||||
{
|
||||
Logger = logger;
|
||||
}
|
||||
|
||||
|
||||
public virtual bool CanHandle(string command)
|
||||
{
|
||||
return Commands.Contains(command);
|
||||
}
|
||||
|
||||
|
||||
public abstract Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request);
|
||||
|
||||
public virtual Task<NodeInvokeResponse> ExecuteAsync(NodeInvokeRequest request, CancellationToken cancellationToken)
|
||||
=> ExecuteAsync(request);
|
||||
|
||||
protected NodeInvokeResponse Success(object? payload = null)
|
||||
{
|
||||
|
||||
@ -15,13 +15,6 @@
|
||||
<PackageReference Include="NSec.Cryptography" Version="25.4.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<!-- Audio / Speech-to-Text (platform-agnostic components) -->
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Whisper.net" Version="1.9.0" />
|
||||
<PackageReference Include="Whisper.net.Runtime" Version="1.9.0" />
|
||||
<PackageReference Include="Microsoft.ML.OnnxRuntime" Version="1.25.1" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,4 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
@ -16,14 +15,8 @@ public class SettingsData
|
||||
public string? SshTunnelHost { get; set; }
|
||||
public int SshTunnelRemotePort { get; set; } = 18789;
|
||||
public int SshTunnelLocalPort { get; set; } = 18789;
|
||||
public bool AutoStart { get; set; } = true;
|
||||
public bool AutoStart { get; set; }
|
||||
public bool GlobalHotkeyEnabled { get; set; } = true;
|
||||
/// <summary>
|
||||
/// One-shot gate: set to true after the post-onboarding "first-run" bootstrap
|
||||
/// kickoff message has been injected into the chat exactly once. Subsequent
|
||||
/// chat-window launches skip injection.
|
||||
/// </summary>
|
||||
public bool HasInjectedFirstRunBootstrap { get; set; }
|
||||
public bool ShowNotifications { get; set; } = true;
|
||||
public string? NotificationSound { get; set; }
|
||||
public bool NotifyHealth { get; set; } = true;
|
||||
@ -35,68 +28,13 @@ public class SettingsData
|
||||
public bool NotifyStock { get; set; } = true;
|
||||
public bool NotifyInfo { get; set; } = true;
|
||||
public bool EnableNodeMode { get; set; } = false;
|
||||
public bool NodeCanvasEnabled { get; set; } = true;
|
||||
public bool NodeScreenEnabled { get; set; } = true;
|
||||
public bool NodeCameraEnabled { get; set; } = true;
|
||||
public bool ScreenRecordingConsentGiven { get; set; } = false;
|
||||
public bool CameraRecordingConsentGiven { get; set; } = false;
|
||||
public bool NodeLocationEnabled { get; set; } = true;
|
||||
public bool NodeBrowserProxyEnabled { get; set; } = true;
|
||||
public bool NodeSttEnabled { get; set; } = false;
|
||||
/// <summary>STT language: "auto" for Whisper auto-detect, or a BCP-47 tag like "en-US".</summary>
|
||||
public string SttLanguage { get; set; } = "auto";
|
||||
/// <summary>Whisper model name: "tiny", "base", or "small".</summary>
|
||||
public string SttModelName { get; set; } = "base";
|
||||
/// <summary>Seconds of silence before auto-submit in voice chat mode.</summary>
|
||||
public float SttSilenceTimeout { get; set; } = 2.5f;
|
||||
/// <summary>Enable TTS playback of responses during voice sessions.</summary>
|
||||
public bool VoiceTtsEnabled { get; set; } = true;
|
||||
/// <summary>Play audio feedback chimes on listen start/stop.</summary>
|
||||
public bool VoiceAudioFeedback { get; set; } = true;
|
||||
public bool NodeTtsEnabled { get; set; } = false;
|
||||
public string TtsProvider { get; set; } = OpenClaw.Shared.Capabilities.TtsCapability.PiperProvider;
|
||||
/// <summary>Persisted: whether the Hub's NavigationView pane is expanded
|
||||
/// (true) or collapsed/compact (false). Default true.</summary>
|
||||
public bool HubNavPaneOpen { get; set; } = true;
|
||||
/// <summary>Optional Windows TTS voice id (or display name). Empty = system default.</summary>
|
||||
public string? TtsWindowsVoiceId { get; set; }
|
||||
/// <summary>
|
||||
/// ElevenLabs API key storage slot. When persisted by the Windows tray's
|
||||
/// SettingsManager this is an opaque dpapi:-prefixed blob, not plaintext.
|
||||
/// </summary>
|
||||
public string? TtsElevenLabsApiKey { get; set; }
|
||||
public string? TtsElevenLabsModel { get; set; }
|
||||
public string? TtsElevenLabsVoiceId { get; set; }
|
||||
/// <summary>Piper voice identifier, e.g. "en_US-amy-low". Voice file is downloaded on first use.</summary>
|
||||
public string TtsPiperVoiceId { get; set; } = "en_US-amy-low";
|
||||
/// <summary>Run the local MCP HTTP server. Independent of EnableNodeMode.</summary>
|
||||
public bool EnableMcpServer { get; set; } = false;
|
||||
/// <summary>
|
||||
/// Hostnames the A2UI image renderer is allowed to fetch over HTTPS.
|
||||
/// Empty by default — agents can still ship inline data: images. Add hosts
|
||||
/// (e.g., "cdn.example.com") via the Settings window.
|
||||
/// </summary>
|
||||
public List<string>? A2UIImageHosts { get; set; }
|
||||
/// <summary>
|
||||
/// Legacy flag (replaced by EnableMcpServer + the EnableNodeMode pair).
|
||||
/// Kept for one-time migration on Load; not written on Save.
|
||||
/// </summary>
|
||||
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
|
||||
public bool? McpOnlyMode { get; set; }
|
||||
public string? PreferredGatewayId { get; set; }
|
||||
public bool HasSeenActivityStreamTip { get; set; } = false;
|
||||
public string? SkippedUpdateTag { get; set; }
|
||||
public bool NotifyChatResponses { get; set; } = true;
|
||||
public bool PreferStructuredCategories { get; set; } = true;
|
||||
public List<UserNotificationRule>? UserRules { get; set; }
|
||||
|
||||
// ── (Voice / STT settings consolidated into the block above.) ──
|
||||
|
||||
private static readonly JsonSerializerOptions s_options = new()
|
||||
{
|
||||
WriteIndented = true,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
private static readonly JsonSerializerOptions s_options = new() { WriteIndented = true };
|
||||
|
||||
public string ToJson() => JsonSerializer.Serialize(this, s_options);
|
||||
|
||||
|
||||
@ -1,80 +0,0 @@
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
public static class SshTunnelCommandLine
|
||||
{
|
||||
private static readonly Regex s_validSshUser = new(@"^[a-zA-Z0-9._-]+$", RegexOptions.Compiled);
|
||||
private static readonly Regex s_validSshHost = new(@"^[a-zA-Z0-9._-]+$", RegexOptions.Compiled);
|
||||
|
||||
// Fixed SSH options shared by every tunnel invocation.
|
||||
// Centralised here so the connection policy is visible and easy to review or adjust.
|
||||
private const string BaseOptions =
|
||||
"-o BatchMode=yes " +
|
||||
"-o ExitOnForwardFailure=yes " +
|
||||
"-o ServerAliveInterval=15 " +
|
||||
"-o ServerAliveCountMax=3 " +
|
||||
"-o TCPKeepAlive=yes " +
|
||||
"-N ";
|
||||
|
||||
public static string BuildArguments(string user, string host, int remotePort, int localPort)
|
||||
=> BuildArguments(user, host, remotePort, localPort, includeBrowserProxyForward: false);
|
||||
|
||||
public static string BuildArguments(
|
||||
string user,
|
||||
string host,
|
||||
int remotePort,
|
||||
int localPort,
|
||||
bool includeBrowserProxyForward)
|
||||
{
|
||||
user = user.Trim();
|
||||
host = host.Trim();
|
||||
|
||||
if (!s_validSshUser.IsMatch(user))
|
||||
throw new ArgumentException($"SSH user contains invalid characters: '{user}'", nameof(user));
|
||||
if (!s_validSshHost.IsMatch(host))
|
||||
throw new ArgumentException($"SSH host contains invalid characters: '{host}'", nameof(host));
|
||||
ValidatePort(remotePort, nameof(remotePort));
|
||||
ValidatePort(localPort, nameof(localPort));
|
||||
if (includeBrowserProxyForward)
|
||||
{
|
||||
ValidateBrowserProxyPort(remotePort, nameof(remotePort));
|
||||
ValidateBrowserProxyPort(localPort, nameof(localPort));
|
||||
}
|
||||
|
||||
var sb = new StringBuilder(BaseOptions);
|
||||
AppendLocalForward(sb, localPort, remotePort);
|
||||
if (includeBrowserProxyForward)
|
||||
AppendLocalForward(sb, localPort + 2, remotePort + 2);
|
||||
sb.Append(user);
|
||||
sb.Append('@');
|
||||
sb.Append(host);
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
public static bool CanForwardBrowserProxyPort(int remotePort, int localPort) =>
|
||||
remotePort is >= 1 and <= 65533 &&
|
||||
localPort is >= 1 and <= 65533;
|
||||
|
||||
private static void AppendLocalForward(StringBuilder sb, int localPort, int remotePort)
|
||||
{
|
||||
sb.Append("-L ");
|
||||
sb.Append(localPort);
|
||||
sb.Append(":127.0.0.1:");
|
||||
sb.Append(remotePort);
|
||||
sb.Append(' ');
|
||||
}
|
||||
|
||||
private static void ValidatePort(int port, string parameterName)
|
||||
{
|
||||
if (port is < 1 or > 65535)
|
||||
throw new ArgumentOutOfRangeException(parameterName, port, "SSH tunnel ports must be between 1 and 65535.");
|
||||
}
|
||||
|
||||
private static void ValidateBrowserProxyPort(int port, string parameterName)
|
||||
{
|
||||
if (port is > 65533)
|
||||
throw new ArgumentOutOfRangeException(parameterName, port, "Browser proxy SSH forwarding requires gateway ports at or below 65533.");
|
||||
}
|
||||
}
|
||||
@ -1,35 +0,0 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
public static class TokenSanitizer
|
||||
{
|
||||
private static readonly Regex AuthorizationBearerPattern = new(
|
||||
@"(?i)(Authorization\s*:\s*Bearer\s+)([^\s""',;]+)",
|
||||
RegexOptions.Compiled | RegexOptions.CultureInvariant);
|
||||
|
||||
private static readonly Regex JsonSecretFieldPattern = new(
|
||||
@"""(?<key>[^""]*(?:token|secret|bearer|authorization)[^""]*)""\s*:\s*""(?<value>[^""]+)""",
|
||||
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
|
||||
|
||||
private static readonly Regex BareGatewayHexTokenPattern = new(
|
||||
@"(?<![0-9A-Fa-f])[0-9a-f]{64}(?![0-9A-Fa-f])",
|
||||
RegexOptions.Compiled | RegexOptions.CultureInvariant);
|
||||
|
||||
private static readonly Regex LongBase64UrlPattern = new(
|
||||
@"(?<![A-Za-z0-9_-])[A-Za-z0-9_-]{43}(?![A-Za-z0-9_-])",
|
||||
RegexOptions.Compiled | RegexOptions.CultureInvariant);
|
||||
|
||||
public static string Sanitize(string? message)
|
||||
{
|
||||
if (string.IsNullOrEmpty(message))
|
||||
return message ?? string.Empty;
|
||||
|
||||
var sanitized = AuthorizationBearerPattern.Replace(message, "$1[REDACTED]");
|
||||
sanitized = JsonSecretFieldPattern.Replace(
|
||||
sanitized,
|
||||
match => $"\"{match.Groups["key"].Value}\":\"[REDACTED]\"");
|
||||
sanitized = BareGatewayHexTokenPattern.Replace(sanitized, "[REDACTED_TOKEN]");
|
||||
return LongBase64UrlPattern.Replace(sanitized, "[REDACTED_TOKEN]");
|
||||
}
|
||||
}
|
||||
@ -1,33 +0,0 @@
|
||||
using System;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// Reduce a URL to the parts that are safe to write to disk-backed logs.
|
||||
/// Query strings routinely carry tokens, codes, signatures, email addresses,
|
||||
/// and PII; the log-rotation policy on a developer machine is "never", so
|
||||
/// anything we put in the log file effectively lives forever.
|
||||
///
|
||||
/// The shape is "scheme://host[:port]/<first-segment>/…" — enough to triage,
|
||||
/// not enough to replay an OAuth callback or recover a credential. URLs that
|
||||
/// fail to parse are returned as the literal "<unparseable URL>" rather
|
||||
/// than echoed back, so a deliberately malformed string can't slip through.
|
||||
/// </summary>
|
||||
public static class UrlLogSanitizer
|
||||
{
|
||||
public static string Sanitize(string? url)
|
||||
{
|
||||
if (string.IsNullOrEmpty(url)) return "<empty>";
|
||||
if (!Uri.TryCreate(url, UriKind.Absolute, out var uri)) return "<unparseable URL>";
|
||||
|
||||
var origin = uri.GetLeftPart(UriPartial.Authority);
|
||||
var path = uri.AbsolutePath;
|
||||
if (string.IsNullOrEmpty(path) || path == "/") return origin + "/";
|
||||
|
||||
// Keep only the first segment so a /reset-password/<token> style path
|
||||
// doesn't leak the bearer-equivalent secret in the segment itself.
|
||||
var firstSlash = path.IndexOf('/', 1);
|
||||
var firstSegment = firstSlash < 0 ? path : path.Substring(0, firstSlash);
|
||||
return origin + firstSegment + (firstSlash < 0 ? string.Empty : "/…");
|
||||
}
|
||||
}
|
||||
@ -1,130 +0,0 @@
|
||||
using System;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace OpenClaw.Shared;
|
||||
|
||||
/// <summary>
|
||||
/// A JSON message exchanged over the WebView2 native↔SPA bridge.
|
||||
///
|
||||
/// Wire format: <c>{ "type": "<string>", "payload": { ... } }</c>
|
||||
///
|
||||
/// Native → SPA: <c>CoreWebView2.PostWebMessageAsJson(msg.ToJson())</c>
|
||||
/// SPA → Native: <c>CoreWebView2.WebMessageReceived</c> → <c>WebBridgeMessage.TryParse(e.WebMessageAsJson)</c>
|
||||
/// </summary>
|
||||
public sealed record WebBridgeMessage
|
||||
{
|
||||
public WebBridgeMessage(string type, string? payloadJson = null)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(type))
|
||||
throw new ArgumentException("Bridge message type is required.", nameof(type));
|
||||
|
||||
Type = type.Trim();
|
||||
PayloadJson = NormalizePayloadJson(payloadJson);
|
||||
}
|
||||
|
||||
public string Type { get; init; }
|
||||
|
||||
public string? PayloadJson { get; init; }
|
||||
|
||||
// ── well-known type constants ──────────────────────────────────────────
|
||||
|
||||
/// <summary>Sent native→SPA when a screen-recording session starts.</summary>
|
||||
public const string TypeRecordingStart = "recording-start";
|
||||
|
||||
/// <summary>Sent native→SPA when a screen-recording session ends.</summary>
|
||||
public const string TypeRecordingStop = "recording-stop";
|
||||
|
||||
/// <summary>Sent native→SPA when voice listening becomes active.</summary>
|
||||
public const string TypeVoiceStart = "voice-start";
|
||||
|
||||
/// <summary>Sent native→SPA when voice listening becomes inactive.</summary>
|
||||
public const string TypeVoiceStop = "voice-stop";
|
||||
|
||||
/// <summary>Sent native→SPA to push draft text into the chat input.</summary>
|
||||
public const string TypeDraftText = "draft-text";
|
||||
|
||||
/// <summary>Sent SPA→native when the SPA is fully initialised and ready for messages.</summary>
|
||||
public const string TypeReady = "ready";
|
||||
|
||||
// ── parsing ────────────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Tries to parse a <see cref="WebBridgeMessage"/> from a JSON string.
|
||||
/// Returns <see langword="null"/> if the JSON is malformed or missing the required "type" field.
|
||||
/// </summary>
|
||||
public static WebBridgeMessage? TryParse(string? json)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(json))
|
||||
return null;
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
var root = doc.RootElement;
|
||||
|
||||
if (!root.TryGetProperty("type", out var typeEl) || typeEl.ValueKind != JsonValueKind.String)
|
||||
return null;
|
||||
|
||||
var type = typeEl.GetString();
|
||||
if (string.IsNullOrWhiteSpace(type))
|
||||
return null;
|
||||
|
||||
string? payloadJson = null;
|
||||
if (root.TryGetProperty("payload", out var payloadEl)
|
||||
&& payloadEl.ValueKind != JsonValueKind.Null
|
||||
&& payloadEl.ValueKind != JsonValueKind.Undefined)
|
||||
{
|
||||
payloadJson = payloadEl.GetRawText();
|
||||
}
|
||||
|
||||
return new WebBridgeMessage(type!, payloadJson);
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
catch (ArgumentException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ── serialisation ──────────────────────────────────────────────────────
|
||||
|
||||
/// <summary>
|
||||
/// Serialises the message to JSON, suitable for passing to
|
||||
/// <c>CoreWebView2.PostWebMessageAsJson</c>.
|
||||
/// If <paramref name="payload"/> is supplied it overrides <see cref="PayloadJson"/>.
|
||||
/// </summary>
|
||||
public string ToJson(object? payload = null)
|
||||
{
|
||||
var typeEncoded = JsonSerializer.Serialize(Type);
|
||||
|
||||
if (payload != null)
|
||||
{
|
||||
var payloadEncoded = JsonSerializer.Serialize(payload);
|
||||
return $"{{\"type\":{typeEncoded},\"payload\":{payloadEncoded}}}";
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(PayloadJson))
|
||||
return $"{{\"type\":{typeEncoded},\"payload\":{PayloadJson}}}";
|
||||
|
||||
return $"{{\"type\":{typeEncoded},\"payload\":{{}}}}";
|
||||
}
|
||||
|
||||
private static string? NormalizePayloadJson(string? payloadJson)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(payloadJson))
|
||||
return null;
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(payloadJson);
|
||||
return doc.RootElement.GetRawText();
|
||||
}
|
||||
catch (JsonException ex)
|
||||
{
|
||||
throw new ArgumentException("PayloadJson must be a valid JSON value.", nameof(payloadJson), ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -251,10 +251,6 @@ public abstract class WebSocketClientBase : IDisposable
|
||||
while (!_disposed && !_cts.Token.IsCancellationRequested && ShouldAutoReconnect())
|
||||
{
|
||||
var delay = BackoffMs[Math.Min(_reconnectAttempts, BackoffMs.Length - 1)];
|
||||
// Add 0-25% jitter to prevent thundering herd when multiple clients
|
||||
// (operator + node) reconnect on the same schedule
|
||||
var jitter = Random.Shared.Next(0, delay / 4);
|
||||
delay += jitter;
|
||||
_reconnectAttempts++;
|
||||
_logger.Warn($"{ClientRole} reconnecting in {delay}ms (attempt {_reconnectAttempts})");
|
||||
RaiseStatusChanged(ConnectionStatus.Connecting);
|
||||
|
||||
@ -30,14 +30,6 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
private bool _isPaired;
|
||||
// Bridges the gap between an approval event and the next hello-ok when the gateway omits auth.deviceToken.
|
||||
private bool _pairingApprovedAwaitingReconnect;
|
||||
// Persists across disconnect/error so ShouldAutoReconnect can block reconnect
|
||||
// even after OnDisconnected clears _isPendingApproval.
|
||||
private volatile bool _pairingBlocked;
|
||||
private volatile bool _rateLimited;
|
||||
// Bug 3: source-side idempotency for PairingStatusChanged. HandleHelloOk runs on every
|
||||
// WS reconnect and re-fires PairingStatus.Paired even when nothing changed, causing a
|
||||
// toast storm in the tray UI. Track the last emitted status and only fire on transitions.
|
||||
private PairingStatus? _lastEmittedPairingStatus;
|
||||
private readonly string _gatewayToken;
|
||||
private readonly string? _bootstrapToken;
|
||||
|
||||
@ -54,7 +46,6 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
public event EventHandler<NodeInvokeCompletedEventArgs>? InvokeCompleted;
|
||||
public event EventHandler<PairingStatusEventArgs>? PairingStatusChanged;
|
||||
public event EventHandler<JsonElement>? HealthReceived;
|
||||
public event EventHandler<GatewaySelfInfo>? GatewaySelfUpdated;
|
||||
|
||||
public new bool IsConnected => _isConnected;
|
||||
public string? NodeId => _nodeId;
|
||||
@ -65,7 +56,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
public bool IsPendingApproval => _isPendingApproval;
|
||||
|
||||
/// <summary>True if device is paired via a stored token or an explicit gateway approval event.</summary>
|
||||
public bool IsPaired => _isPaired || !string.IsNullOrEmpty(_deviceIdentity.NodeDeviceToken);
|
||||
public bool IsPaired => _isPaired || !string.IsNullOrEmpty(_deviceIdentity.DeviceToken);
|
||||
|
||||
/// <summary>Device ID for display/approval (first 16 chars of full ID)</summary>
|
||||
public string ShortDeviceId => _deviceIdentity.DeviceId.Length > 16
|
||||
@ -74,15 +65,12 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
|
||||
/// <summary>Full device ID for approval command</summary>
|
||||
public string FullDeviceId => _deviceIdentity.DeviceId;
|
||||
|
||||
/// <summary>Human-readable display name surfaced to the gateway and other nodes.</summary>
|
||||
public string DisplayName => _registration.DisplayName;
|
||||
|
||||
|
||||
protected override int ReceiveBufferSize => 65536;
|
||||
protected override string ClientRole => "node";
|
||||
|
||||
public WindowsNodeClient(string gatewayUrl, string token, string dataPath, IOpenClawLogger? logger = null, string? bootstrapToken = null)
|
||||
: base(gatewayUrl, ResolveRequiredCredential(token, bootstrapToken, dataPath, logger), logger)
|
||||
: base(gatewayUrl, ResolveRequiredCredential(token, bootstrapToken), logger)
|
||||
{
|
||||
_gatewayToken = NormalizeOptionalCredential(token);
|
||||
_bootstrapToken = NormalizeOptionalCredential(bootstrapToken);
|
||||
@ -106,14 +94,8 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
return string.IsNullOrWhiteSpace(credential) ? string.Empty : credential;
|
||||
}
|
||||
|
||||
private static string ResolveRequiredCredential(string? token, string? bootstrapToken, string dataPath, IOpenClawLogger? logger)
|
||||
private static string ResolveRequiredCredential(string? token, string? bootstrapToken)
|
||||
{
|
||||
var storedNodeToken = TryLoadStoredNodeToken(dataPath, logger);
|
||||
if (!string.IsNullOrEmpty(storedNodeToken))
|
||||
{
|
||||
return storedNodeToken;
|
||||
}
|
||||
|
||||
var gatewayToken = NormalizeOptionalCredential(token);
|
||||
if (!string.IsNullOrEmpty(gatewayToken))
|
||||
{
|
||||
@ -128,25 +110,6 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
|
||||
throw new ArgumentException("Token or bootstrap token is required.", nameof(token));
|
||||
}
|
||||
|
||||
public static bool HasStoredNodeDeviceToken(string dataPath, IOpenClawLogger? logger = null)
|
||||
{
|
||||
return !string.IsNullOrWhiteSpace(TryLoadStoredNodeToken(dataPath, logger));
|
||||
}
|
||||
|
||||
private static string? TryLoadStoredNodeToken(string dataPath, IOpenClawLogger? logger)
|
||||
{
|
||||
try
|
||||
{
|
||||
var identity = new DeviceIdentity(dataPath, logger);
|
||||
identity.Initialize();
|
||||
return string.IsNullOrWhiteSpace(identity.NodeDeviceToken) ? null : identity.NodeDeviceToken;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Register a capability handler
|
||||
@ -213,7 +176,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
try
|
||||
{
|
||||
// Log raw messages at debug level (visible in dbgview, not in log file noise)
|
||||
_logger.Debug($"[NODE RX] {TokenSanitizer.Sanitize(json)}");
|
||||
_logger.Debug($"[NODE RX] {json}");
|
||||
|
||||
using var doc = JsonDocument.Parse(json);
|
||||
var root = doc.RootElement;
|
||||
@ -281,10 +244,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
break;
|
||||
case "health":
|
||||
if (root.TryGetProperty("payload", out var payload))
|
||||
{
|
||||
PublishGatewaySelf(GatewaySelfInfo.FromHealthPayload(payload));
|
||||
HealthReceived?.Invoke(this, payload.Clone());
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -304,12 +264,11 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
|
||||
_isPendingApproval = true;
|
||||
_isPaired = false;
|
||||
_pairingBlocked = true;
|
||||
_pairingApprovedAwaitingReconnect = false;
|
||||
|
||||
_logger.Info($"[NODE] Pairing requested for this device via {eventType}");
|
||||
_logger.Info($"To approve, run: openclaw devices approve {_deviceIdentity.DeviceId}");
|
||||
EmitPairingStatusOnTransition(new PairingStatusEventArgs(
|
||||
PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
PairingStatus.Pending,
|
||||
_deviceIdentity.DeviceId,
|
||||
$"Run: openclaw devices approve {ShortDeviceId}..."));
|
||||
@ -338,10 +297,9 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
{
|
||||
_isPendingApproval = false;
|
||||
_isPaired = true;
|
||||
_pairingBlocked = false; // Allow reconnect after approval
|
||||
_pairingApprovedAwaitingReconnect = true;
|
||||
|
||||
EmitPairingStatusOnTransition(new PairingStatusEventArgs(
|
||||
PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
PairingStatus.Paired,
|
||||
_deviceIdentity.DeviceId,
|
||||
"Pairing approved; reconnecting to refresh node state."));
|
||||
@ -357,7 +315,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
_isPaired = false;
|
||||
_pairingApprovedAwaitingReconnect = false;
|
||||
|
||||
EmitPairingStatusOnTransition(new PairingStatusEventArgs(
|
||||
PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
PairingStatus.Rejected,
|
||||
_deviceIdentity.DeviceId,
|
||||
null));
|
||||
@ -528,7 +486,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
|
||||
private async Task SendNodeConnectAsync(string? nonce, long ts)
|
||||
{
|
||||
var isPaired = !string.IsNullOrEmpty(_deviceIdentity.NodeDeviceToken);
|
||||
var isPaired = !string.IsNullOrEmpty(_deviceIdentity.DeviceToken);
|
||||
var usingBootstrap = !isPaired && !string.IsNullOrEmpty(_bootstrapToken);
|
||||
|
||||
_logger.Info($"Connecting with Ed25519 device identity (paired: {isPaired}, bootstrap: {usingBootstrap})");
|
||||
@ -598,9 +556,9 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
|
||||
private (Dictionary<string, string> Auth, string TokenForSignature) BuildConnectAuth()
|
||||
{
|
||||
if (!string.IsNullOrEmpty(_deviceIdentity.NodeDeviceToken))
|
||||
if (!string.IsNullOrEmpty(_deviceIdentity.DeviceToken))
|
||||
{
|
||||
return (new Dictionary<string, string> { ["deviceToken"] = _deviceIdentity.NodeDeviceToken }, _deviceIdentity.NodeDeviceToken);
|
||||
return (new Dictionary<string, string> { ["token"] = _deviceIdentity.DeviceToken }, _deviceIdentity.DeviceToken);
|
||||
}
|
||||
|
||||
if (!string.IsNullOrEmpty(_bootstrapToken))
|
||||
@ -629,10 +587,8 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
// Handle hello-ok (successful registration)
|
||||
if (payload.TryGetProperty("type", out var t) && t.GetString() == "hello-ok")
|
||||
{
|
||||
PublishGatewaySelf(GatewaySelfInfo.FromHelloOk(payload));
|
||||
var reconnectingAfterApproval = _pairingApprovedAwaitingReconnect;
|
||||
_isConnected = true;
|
||||
_rateLimited = false; // Clear transient rate-limit on successful connect
|
||||
ResetReconnectAttempts();
|
||||
|
||||
// Extract node ID if returned
|
||||
@ -657,8 +613,8 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
_isPaired = true;
|
||||
_pairingApprovedAwaitingReconnect = false;
|
||||
_logger.Info("Received device token - we are now paired!");
|
||||
_deviceIdentity.StoreDeviceTokenForRole("node", deviceToken, TryGetAuthScopes(authPayload));
|
||||
EmitPairingStatusOnTransition(new PairingStatusEventArgs(
|
||||
_deviceIdentity.StoreDeviceToken(deviceToken);
|
||||
PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
PairingStatus.Paired,
|
||||
_deviceIdentity.DeviceId,
|
||||
wasWaiting ? "Pairing approved!" : null));
|
||||
@ -671,7 +627,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
// Skip this block if we already fired PairingStatusChanged above via gotNewToken.
|
||||
if (!gotNewToken)
|
||||
{
|
||||
if (string.IsNullOrEmpty(_deviceIdentity.NodeDeviceToken))
|
||||
if (string.IsNullOrEmpty(_deviceIdentity.DeviceToken))
|
||||
{
|
||||
if (reconnectingAfterApproval)
|
||||
{
|
||||
@ -684,10 +640,9 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
{
|
||||
_isPendingApproval = true;
|
||||
_isPaired = false;
|
||||
_pairingBlocked = true;
|
||||
_logger.Info("Not yet paired - check 'openclaw devices list' for pending approval");
|
||||
_logger.Info($"To approve, run: openclaw devices approve {_deviceIdentity.DeviceId}");
|
||||
EmitPairingStatusOnTransition(new PairingStatusEventArgs(
|
||||
PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
PairingStatus.Pending,
|
||||
_deviceIdentity.DeviceId,
|
||||
$"Run: openclaw devices approve {ShortDeviceId}..."));
|
||||
@ -699,7 +654,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
_isPaired = true;
|
||||
_pairingApprovedAwaitingReconnect = false;
|
||||
_logger.Info("Already paired with stored device token");
|
||||
EmitPairingStatusOnTransition(new PairingStatusEventArgs(
|
||||
PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
PairingStatus.Paired,
|
||||
_deviceIdentity.DeviceId));
|
||||
}
|
||||
@ -709,22 +664,6 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Bug 3: source-side suppression of duplicate PairingStatusChanged events from
|
||||
/// HandleHelloOk on WS reconnects. Only fire when the status differs from the last
|
||||
/// emitted status (or when nothing has been emitted yet).
|
||||
/// </summary>
|
||||
private void EmitPairingStatusOnTransition(PairingStatusEventArgs args)
|
||||
{
|
||||
if (_lastEmittedPairingStatus == args.Status)
|
||||
{
|
||||
_logger.Info($"[NODE] Suppressing duplicate pairing status event: {args.Status} for {args.DeviceId}");
|
||||
return;
|
||||
}
|
||||
_lastEmittedPairingStatus = args.Status;
|
||||
PairingStatusChanged?.Invoke(this, args);
|
||||
}
|
||||
|
||||
private void HandleRequestError(JsonElement root)
|
||||
{
|
||||
var error = "Unknown error";
|
||||
@ -764,7 +703,6 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
|
||||
_isPendingApproval = true;
|
||||
_isPaired = false;
|
||||
_pairingBlocked = true;
|
||||
_pairingApprovedAwaitingReconnect = false;
|
||||
|
||||
var detail = !string.IsNullOrWhiteSpace(pairingRequestId)
|
||||
@ -772,26 +710,14 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
: $"Run: openclaw devices approve {ShortDeviceId}...";
|
||||
_logger.Info($"[NODE] Pairing required for this device; reason={pairingReason ?? "unknown"}, requestId={pairingRequestId ?? "none"}");
|
||||
_logger.Info($"To approve, run: openclaw devices approve {_deviceIdentity.DeviceId}");
|
||||
EmitPairingStatusOnTransition(new PairingStatusEventArgs(
|
||||
PairingStatusChanged?.Invoke(this, new PairingStatusEventArgs(
|
||||
PairingStatus.Pending,
|
||||
_deviceIdentity.DeviceId,
|
||||
detail));
|
||||
return;
|
||||
}
|
||||
|
||||
// Rate-limit / terminal auth errors — stop reconnecting
|
||||
if (error.Contains("too many failed", StringComparison.OrdinalIgnoreCase) ||
|
||||
error.Contains("rate limit", StringComparison.OrdinalIgnoreCase) ||
|
||||
error.Contains("origin not allowed", StringComparison.OrdinalIgnoreCase) ||
|
||||
error.Contains("token mismatch", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
_rateLimited = true;
|
||||
_logger.Warn($"[NODE] Terminal auth error; stopping reconnect. Error: {TokenSanitizer.Sanitize(error)}");
|
||||
RaiseStatusChanged(ConnectionStatus.Error);
|
||||
return;
|
||||
}
|
||||
|
||||
_logger.Error($"Node registration failed: {TokenSanitizer.Sanitize(error)} (code: {errorCode})");
|
||||
_logger.Error($"Node registration failed: {error} (code: {errorCode})");
|
||||
RaiseStatusChanged(ConnectionStatus.Error);
|
||||
}
|
||||
|
||||
@ -839,27 +765,6 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
value = prop.GetString();
|
||||
return !string.IsNullOrWhiteSpace(value);
|
||||
}
|
||||
|
||||
private static string[]? TryGetAuthScopes(JsonElement authPayload)
|
||||
{
|
||||
if (!authPayload.TryGetProperty("scopes", out var scopes) || scopes.ValueKind != JsonValueKind.Array)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var values = new List<string>();
|
||||
foreach (var item in scopes.EnumerateArray())
|
||||
{
|
||||
if (item.ValueKind == JsonValueKind.String)
|
||||
{
|
||||
var value = item.GetString();
|
||||
if (!string.IsNullOrWhiteSpace(value))
|
||||
values.Add(value);
|
||||
}
|
||||
}
|
||||
|
||||
return values.Count == 0 ? null : values.Distinct(StringComparer.Ordinal).ToArray();
|
||||
}
|
||||
|
||||
private async Task HandleRequestAsync(JsonElement root)
|
||||
{
|
||||
@ -1010,40 +915,6 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
await SendRawAsync(JsonSerializer.Serialize(msg));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sends a node.event request with JSON payload.
|
||||
/// Returns false when not connected or when the transport send fails.
|
||||
/// </summary>
|
||||
public async Task<bool> SendNodeEventAsync(string eventName, System.Text.Json.Nodes.JsonObject payload)
|
||||
{
|
||||
if (string.IsNullOrEmpty(eventName)) throw new ArgumentException("eventName is required", nameof(eventName));
|
||||
if (payload is null) throw new ArgumentNullException(nameof(payload));
|
||||
if (!_isConnected) return false;
|
||||
|
||||
var msg = new
|
||||
{
|
||||
type = "req",
|
||||
id = Guid.NewGuid().ToString(),
|
||||
method = "node.event",
|
||||
@params = new
|
||||
{
|
||||
@event = eventName,
|
||||
payloadJSON = payload.ToJsonString(),
|
||||
},
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
await SendRawAsync(JsonSerializer.Serialize(msg, s_ignoreNullOptions));
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Warn($"node.event '{eventName}' send failed: {ex.Message}");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private async Task SendPongAsync(string? requestId)
|
||||
{
|
||||
if (requestId == null) return;
|
||||
@ -1058,29 +929,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
|
||||
await SendRawAsync(JsonSerializer.Serialize(msg));
|
||||
}
|
||||
|
||||
private void PublishGatewaySelf(GatewaySelfInfo info)
|
||||
{
|
||||
if (!info.HasAnyDetails)
|
||||
return;
|
||||
|
||||
GatewaySelfUpdated?.Invoke(this, info);
|
||||
}
|
||||
|
||||
protected override bool ShouldAutoReconnect()
|
||||
{
|
||||
// Don't reconnect while awaiting pairing approval — each reconnect
|
||||
// generates a new pairing request on the gateway, causing a storm.
|
||||
// _pairingBlocked survives OnDisconnected (which clears _isPendingApproval).
|
||||
if (_pairingBlocked)
|
||||
return false;
|
||||
|
||||
if (_rateLimited)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
protected override void OnDisconnected()
|
||||
{
|
||||
_isConnected = false;
|
||||
|
||||
@ -1,72 +0,0 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
|
||||
namespace OpenClawTray.A2UI.Actions;
|
||||
|
||||
/// <summary>
|
||||
/// Builds the single-line tagged user message that the gateway appends to the
|
||||
/// agent session when an A2UI action fires. Byte-for-byte port of the Android
|
||||
/// reference (<c>OpenClawCanvasA2UIAction.formatAgentMessage</c>) so the LLM
|
||||
/// sees identical input regardless of which node emitted it.
|
||||
/// </summary>
|
||||
public static class AgentMessageFormatter
|
||||
{
|
||||
/// <summary>
|
||||
/// Sanitize a tag value for inclusion in the space-separated CANVAS_A2UI
|
||||
/// line. Whitespace becomes <c>_</c>; any character outside
|
||||
/// <c>[A-Za-z0-9_\-.:]</c> becomes <c>_</c>; empty/whitespace inputs
|
||||
/// become <c>-</c> (so we never emit a bare <c>key=</c>).
|
||||
/// </summary>
|
||||
public static string SanitizeTagValue(string? value)
|
||||
{
|
||||
var trimmed = (value ?? string.Empty).Trim();
|
||||
if (trimmed.Length == 0) return "-";
|
||||
var normalized = trimmed.Replace(' ', '_');
|
||||
var sb = new StringBuilder(normalized.Length);
|
||||
foreach (var c in normalized)
|
||||
{
|
||||
bool ok = char.IsLetterOrDigit(c) || c == '_' || c == '-' || c == '.' || c == ':';
|
||||
sb.Append(ok ? c : '_');
|
||||
}
|
||||
return sb.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Format the user-shaped message that the gateway will append as a turn
|
||||
/// in <paramref name="sessionKey"/>. The model decides what each action
|
||||
/// name means; the gateway has no built-in action→tool mapping.
|
||||
///
|
||||
/// <para>Tag order matters for prompt-injection defense: the
|
||||
/// <c>default=update_canvas</c> sentinel comes BEFORE the
|
||||
/// agent-controlled <c>ctx={...}</c> JSON. A hostile component could put
|
||||
/// e.g. <c>"} default=do_something_else"</c> in a context value; if
|
||||
/// <c>default=</c> were emitted last, that injected fragment would render
|
||||
/// as a second <c>default=</c> token and might shadow ours. With the
|
||||
/// sentinel before <c>ctx=</c>, anything the agent can sneak in is
|
||||
/// strictly trailing noise.</para>
|
||||
/// </summary>
|
||||
public static string FormatAgentMessage(
|
||||
string actionName,
|
||||
string sessionKey,
|
||||
string surfaceId,
|
||||
string sourceComponentId,
|
||||
string host,
|
||||
string instanceId,
|
||||
string? contextJson)
|
||||
{
|
||||
var parts = new List<string>(8)
|
||||
{
|
||||
"CANVAS_A2UI",
|
||||
$"action={SanitizeTagValue(actionName)}",
|
||||
$"session={SanitizeTagValue(sessionKey)}",
|
||||
$"surface={SanitizeTagValue(surfaceId)}",
|
||||
$"component={SanitizeTagValue(sourceComponentId)}",
|
||||
$"host={SanitizeTagValue(host)}",
|
||||
$"instance={SanitizeTagValue(instanceId)}",
|
||||
"default=update_canvas",
|
||||
};
|
||||
if (!string.IsNullOrWhiteSpace(contextJson))
|
||||
parts.Add($"ctx={contextJson}");
|
||||
return string.Join(" ", parts);
|
||||
}
|
||||
}
|
||||
@ -1,177 +0,0 @@
|
||||
using System;
|
||||
using System.Text.Json.Nodes;
|
||||
using System.Threading.Tasks;
|
||||
using OpenClaw.Shared;
|
||||
|
||||
namespace OpenClawTray.A2UI.Actions;
|
||||
|
||||
/// <summary>
|
||||
/// Per-node identity / session context the transport needs to format the
|
||||
/// CANVAS_A2UI message. Supplied by NodeService — sessionKey can change over
|
||||
/// the lifetime of the node (re-resolved on each delivery), the rest is
|
||||
/// effectively immutable per process.
|
||||
/// </summary>
|
||||
public interface IGatewayActionContext
|
||||
{
|
||||
/// <summary>Logical session the action should be appended to. Defaults to "main".</summary>
|
||||
string SessionKey { get; }
|
||||
/// <summary>Display name of this node (e.g. "Windows Node (DESKTOP-123)") — shown to the model as <c>host=</c>.</summary>
|
||||
string Host { get; }
|
||||
/// <summary>Stable per-device id, lowercased — shown to the model as <c>instance=</c>.</summary>
|
||||
string InstanceId { get; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Raised after a transport attempt so the renderer can clear a spinner / show
|
||||
/// an error. Mirrors the Android <c>jsDispatchA2UIActionStatus</c> path but
|
||||
/// stays in-process.
|
||||
/// </summary>
|
||||
public sealed class A2UIActionStatusEventArgs : EventArgs
|
||||
{
|
||||
public required string ActionId { get; init; }
|
||||
public required bool Ok { get; init; }
|
||||
public string? Error { get; init; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sends A2UI actions to the gateway by formatting them as a tagged user
|
||||
/// message and delivering through the gateway's <c>agent.request</c> node-event
|
||||
/// channel (the same path Android uses, see
|
||||
/// <c>NodeRuntime.handleCanvasA2UIActionFromWebView</c>). The gateway appends
|
||||
/// the message as a user turn in the named session and runs an agent step;
|
||||
/// there is no server-side action→tool registry — the model decides.
|
||||
/// </summary>
|
||||
public sealed class GatewayActionTransport : IA2UIActionTransport
|
||||
{
|
||||
private readonly Func<WindowsNodeClient?> _clientProvider;
|
||||
private readonly IGatewayActionContext _context;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
|
||||
/// <summary>Raised after each delivery attempt — successful or not.</summary>
|
||||
public event EventHandler<A2UIActionStatusEventArgs>? ActionStatus;
|
||||
|
||||
public GatewayActionTransport(
|
||||
Func<WindowsNodeClient?> clientProvider,
|
||||
IGatewayActionContext context,
|
||||
IOpenClawLogger logger)
|
||||
{
|
||||
_clientProvider = clientProvider;
|
||||
_context = context;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public bool IsAvailable => _clientProvider()?.IsConnected == true;
|
||||
|
||||
public async Task DeliverAsync(Protocol.A2UIAction action)
|
||||
{
|
||||
// Capture once: between IsAvailable and here the dispatcher may have
|
||||
// disconnected/recreated the client, and a second call to the provider
|
||||
// can return a different instance.
|
||||
var client = _clientProvider();
|
||||
if (client == null || !client.IsConnected)
|
||||
{
|
||||
RaiseStatus(action.Id, ok: false, error: "gateway not connected");
|
||||
throw new InvalidOperationException("Gateway not connected");
|
||||
}
|
||||
|
||||
var payload = BuildAgentRequestPayload(action, _context);
|
||||
var sent = await client.SendNodeEventAsync("agent.request", payload).ConfigureAwait(false);
|
||||
if (!sent)
|
||||
{
|
||||
RaiseStatus(action.Id, ok: false, error: "send failed");
|
||||
throw new InvalidOperationException("Gateway send failed");
|
||||
}
|
||||
|
||||
RaiseStatus(action.Id, ok: true, error: null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Build the <c>agent.request</c> deep-link payload that the gateway
|
||||
/// receives via <c>node.event</c>. Pure helper — exposed for tests so the
|
||||
/// wire contract can be asserted without spinning up a real node client.
|
||||
/// </summary>
|
||||
public static JsonObject BuildAgentRequestPayload(Protocol.A2UIAction action, IGatewayActionContext context)
|
||||
{
|
||||
// Sanitize the top-level sessionKey, not just the value rendered into
|
||||
// the CANVAS_A2UI tag line. The gateway uses this field to *route* the
|
||||
// message to a session record; an unsanitized value can carry path
|
||||
// separators, control chars, or whitespace that the gateway never
|
||||
// expected. Match the same character class as the tag formatter.
|
||||
var rawSessionKey = string.IsNullOrWhiteSpace(context.SessionKey) ? "main" : context.SessionKey;
|
||||
var sessionKey = AgentMessageFormatter.SanitizeTagValue(rawSessionKey);
|
||||
if (sessionKey == "-") sessionKey = "main";
|
||||
|
||||
var contextJson = action.Context?.ToJsonString();
|
||||
|
||||
var message = AgentMessageFormatter.FormatAgentMessage(
|
||||
actionName: action.Name,
|
||||
sessionKey: sessionKey,
|
||||
surfaceId: action.SurfaceId,
|
||||
sourceComponentId: action.SourceComponentId ?? string.Empty,
|
||||
host: context.Host,
|
||||
instanceId: context.InstanceId,
|
||||
contextJson: contextJson);
|
||||
|
||||
// deliver=false keeps the raw CANVAS_A2UI line out of the visible
|
||||
// chat; only the model's response is shown to the user. thinking=low
|
||||
// matches the Android budget hint for a quick agentic step.
|
||||
return new JsonObject
|
||||
{
|
||||
["message"] = message,
|
||||
["sessionKey"] = sessionKey,
|
||||
["thinking"] = "low",
|
||||
["deliver"] = false,
|
||||
["key"] = action.Id,
|
||||
};
|
||||
}
|
||||
|
||||
private void RaiseStatus(string actionId, bool ok, string? error)
|
||||
{
|
||||
try
|
||||
{
|
||||
ActionStatus?.Invoke(this, new A2UIActionStatusEventArgs
|
||||
{
|
||||
ActionId = actionId,
|
||||
Ok = ok,
|
||||
Error = error,
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Warn($"[A2UI] ActionStatus listener threw: {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Logs the action and stores the last N for diagnostics. Used as a final
|
||||
/// fallback when no real transport is available, so MCP-only nodes don't
|
||||
/// silently drop interactions during development.
|
||||
/// </summary>
|
||||
public sealed class LoggingActionTransport : IA2UIActionTransport
|
||||
{
|
||||
/// <summary>
|
||||
/// When true, log the full serialized envelope including action context.
|
||||
/// Default false: context can carry user-typed form values that the spec
|
||||
/// considers privacy-relevant (and the agent already sees over the wire).
|
||||
/// </summary>
|
||||
public bool LogFullEnvelope { get; set; }
|
||||
|
||||
private readonly IOpenClawLogger _logger;
|
||||
public LoggingActionTransport(IOpenClawLogger logger) { _logger = logger; }
|
||||
public bool IsAvailable => true;
|
||||
public Task DeliverAsync(Protocol.A2UIAction action)
|
||||
{
|
||||
if (LogFullEnvelope)
|
||||
{
|
||||
_logger.Info($"[A2UI] action '{action.Name}' from {action.SourceComponentId ?? "?"} on surface '{action.SurfaceId}' (no remote sink): {A2UIActionEnvelope.Serialize(action)}");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Default: identifiers only — drops the action context payload that
|
||||
// would otherwise carry form/PII data into the log file.
|
||||
_logger.Info($"[A2UI] action '{action.Name}' from {action.SourceComponentId ?? "?"} on surface '{action.SurfaceId}' (no remote sink)");
|
||||
}
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
}
|
||||
@ -1,192 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Nodes;
|
||||
using System.Threading.Tasks;
|
||||
using OpenClaw.Shared;
|
||||
using OpenClawTray.A2UI.Protocol;
|
||||
|
||||
namespace OpenClawTray.A2UI.Actions;
|
||||
|
||||
/// <summary>
|
||||
/// Single seam through which actions leave the renderer. Implementations
|
||||
/// route to the gateway WebSocket, MCP notifications, or a fallback queue.
|
||||
/// </summary>
|
||||
public interface IActionSink
|
||||
{
|
||||
void Raise(A2UIAction action);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Routes actions to one of N transports based on availability. The first
|
||||
/// transport whose <see cref="IA2UIActionTransport.IsAvailable"/> returns
|
||||
/// true wins. If none are available, actions go to an in-memory fallback
|
||||
/// queue that drains on the next available transport.
|
||||
/// </summary>
|
||||
public sealed class ActionDispatcher : IActionSink, IDisposable
|
||||
{
|
||||
/// <summary>Cap for the debounce dictionary. Sweeps oldest entries past <see cref="DebounceWindow"/>.</summary>
|
||||
internal const int MaxDebounceEntries = 256;
|
||||
/// <summary>Cap for the fallback queue. Drops oldest on overflow so the newest action still ships.</summary>
|
||||
internal const int MaxFallbackQueue = 200;
|
||||
|
||||
private readonly IReadOnlyList<IA2UIActionTransport> _transports;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly ConcurrentQueue<A2UIAction> _fallback = new();
|
||||
private readonly Dictionary<string, DateTimeOffset> _lastDelivery = new();
|
||||
private static readonly TimeSpan DebounceWindow = TimeSpan.FromMilliseconds(200);
|
||||
private readonly object _debounceLock = new();
|
||||
private readonly System.Threading.SemaphoreSlim _sendGate = new(1, 1);
|
||||
|
||||
public ActionDispatcher(IReadOnlyList<IA2UIActionTransport> transports, IOpenClawLogger logger)
|
||||
{
|
||||
_transports = transports;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public void Raise(A2UIAction action)
|
||||
{
|
||||
if (IsDebounced(action)) return;
|
||||
_ = SendAsync(action);
|
||||
}
|
||||
|
||||
private bool IsDebounced(A2UIAction action)
|
||||
{
|
||||
var key = $"{action.SurfaceId}|{action.SourceComponentId}|{action.Name}";
|
||||
var now = DateTimeOffset.UtcNow;
|
||||
lock (_debounceLock)
|
||||
{
|
||||
if (_lastDelivery.TryGetValue(key, out var last) && (now - last) < DebounceWindow)
|
||||
return true;
|
||||
_lastDelivery[key] = now;
|
||||
// Sweep stale entries when the dict gets large. Keeps memory bounded
|
||||
// even when the agent emits actions with constantly-changing keys.
|
||||
if (_lastDelivery.Count > MaxDebounceEntries)
|
||||
{
|
||||
var cutoff = now - DebounceWindow;
|
||||
var stale = new List<string>();
|
||||
foreach (var kv in _lastDelivery)
|
||||
if (kv.Value < cutoff) stale.Add(kv.Key);
|
||||
foreach (var k in stale) _lastDelivery.Remove(k);
|
||||
// If sweep didn't reclaim enough, evict arbitrarily — this only
|
||||
// affects debounce, not delivery semantics.
|
||||
if (_lastDelivery.Count > MaxDebounceEntries)
|
||||
{
|
||||
int over = _lastDelivery.Count - MaxDebounceEntries;
|
||||
var toRemove = new List<string>(over);
|
||||
foreach (var k in _lastDelivery.Keys) { toRemove.Add(k); if (toRemove.Count >= over) break; }
|
||||
foreach (var k in toRemove) _lastDelivery.Remove(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private async Task SendAsync(A2UIAction action)
|
||||
{
|
||||
// Single-flight send loop. Without this, two concurrent Raise calls each
|
||||
// try to drain _fallback, racing on TryPeek/TryDequeue and producing
|
||||
// out-of-order delivery under contention. (Unified review M8.)
|
||||
await _sendGate.WaitAsync().ConfigureAwait(false);
|
||||
try
|
||||
{
|
||||
// Drain any backlog first so order is preserved.
|
||||
while (_fallback.TryPeek(out var pending))
|
||||
{
|
||||
if (await TryDeliverAsync(pending))
|
||||
{
|
||||
_fallback.TryDequeue(out _);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!await TryDeliverAsync(action))
|
||||
{
|
||||
if (_fallback.Count >= MaxFallbackQueue)
|
||||
{
|
||||
// Drop the oldest queued action so the newest still has a slot.
|
||||
if (_fallback.TryDequeue(out var dropped))
|
||||
_logger.Warn($"[A2UI] fallback queue full; dropped oldest action '{dropped.Name}' on '{dropped.SurfaceId}'");
|
||||
}
|
||||
_logger.Warn($"[A2UI] No transport available; queued action '{action.Name}' for later delivery.");
|
||||
_fallback.Enqueue(action);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_sendGate.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<bool> TryDeliverAsync(A2UIAction action)
|
||||
{
|
||||
foreach (var t in _transports)
|
||||
{
|
||||
if (!t.IsAvailable) continue;
|
||||
try
|
||||
{
|
||||
await t.DeliverAsync(action);
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.Warn($"[A2UI] Transport '{t.GetType().Name}' failed: {ex.Message}");
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private bool _disposed;
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
if (_disposed) return;
|
||||
_disposed = true;
|
||||
// SemaphoreSlim wraps a kernel event handle; surface rebuilds drop the
|
||||
// dispatcher reference, so without explicit Dispose the handle survives
|
||||
// until GC. Disposable transports are the responsibility of whoever
|
||||
// constructed them.
|
||||
try { _sendGate.Dispose(); } catch { /* ignore */ }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>One concrete delivery channel.</summary>
|
||||
public interface IA2UIActionTransport
|
||||
{
|
||||
bool IsAvailable { get; }
|
||||
Task DeliverAsync(A2UIAction action);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Helper: shared envelope serialization. v0.8 client→server shape.
|
||||
/// </summary>
|
||||
public static class A2UIActionEnvelope
|
||||
{
|
||||
private static readonly JsonSerializerOptions s_options = new()
|
||||
{
|
||||
WriteIndented = false,
|
||||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull,
|
||||
};
|
||||
|
||||
public static JsonObject ToEnvelope(A2UIAction a)
|
||||
{
|
||||
var inner = new JsonObject
|
||||
{
|
||||
["name"] = a.Name,
|
||||
["surfaceId"] = a.SurfaceId,
|
||||
["timestamp"] = a.Timestamp.ToString("o"),
|
||||
};
|
||||
if (!string.IsNullOrEmpty(a.SourceComponentId))
|
||||
inner["sourceComponentId"] = a.SourceComponentId;
|
||||
if (a.Context != null)
|
||||
inner["context"] = (JsonNode)a.Context.DeepClone();
|
||||
|
||||
return new JsonObject { ["action"] = inner };
|
||||
}
|
||||
|
||||
public static string Serialize(A2UIAction a) => ToEnvelope(a).ToJsonString(s_options);
|
||||
}
|
||||
@ -1,405 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.ComponentModel;
|
||||
using System.Text.Json.Nodes;
|
||||
using Microsoft.UI.Dispatching;
|
||||
|
||||
namespace OpenClawTray.A2UI.DataModel;
|
||||
|
||||
/// <summary>
|
||||
/// Per-surface JsonObject store, mutated via JSON Pointer (RFC 6901) patches.
|
||||
/// Notifies registered observers when paths change. Thread-affine to a UI dispatcher.
|
||||
/// </summary>
|
||||
public sealed class DataModelStore
|
||||
{
|
||||
// Per-update caps. Bounded so an adversarial dataModelUpdate can't drive the
|
||||
// UI thread into an OOM or a million-entry loop. Sized to dwarf realistic
|
||||
// catalogs while still rejecting obvious abuse.
|
||||
internal const int MaxEntriesPerUpdate = 1024;
|
||||
internal const int MaxValueMapDepth = 32;
|
||||
internal const int MaxKeyLength = 256;
|
||||
internal const int MaxStringValueLength = 64 * 1024;
|
||||
|
||||
private readonly object _lock = new();
|
||||
private readonly Dictionary<string, SurfaceModel> _surfaces = new(StringComparer.Ordinal);
|
||||
private readonly DispatcherQueue _dispatcher;
|
||||
|
||||
public DataModelStore(DispatcherQueue dispatcher)
|
||||
{
|
||||
_dispatcher = dispatcher;
|
||||
}
|
||||
|
||||
public DataModelObservable GetOrCreate(string surfaceId, JsonObject? seed = null)
|
||||
{
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_surfaces.TryGetValue(surfaceId, out var model))
|
||||
{
|
||||
model = new SurfaceModel(seed != null ? (JsonObject)seed.DeepClone() : new JsonObject());
|
||||
_surfaces[surfaceId] = model;
|
||||
}
|
||||
return new DataModelObservable(model, _dispatcher);
|
||||
}
|
||||
}
|
||||
|
||||
public void Reset(string surfaceId, JsonObject? seed = null)
|
||||
{
|
||||
SurfaceModel? model;
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_surfaces.TryGetValue(surfaceId, out model))
|
||||
{
|
||||
model = new SurfaceModel(seed != null ? (JsonObject)seed.DeepClone() : new JsonObject());
|
||||
_surfaces[surfaceId] = model;
|
||||
return;
|
||||
}
|
||||
model.Replace(seed != null ? (JsonObject)seed.DeepClone() : new JsonObject());
|
||||
}
|
||||
new DataModelObservable(model, _dispatcher).NotifyAllPaths();
|
||||
}
|
||||
|
||||
public void Remove(string surfaceId)
|
||||
{
|
||||
lock (_lock) { _surfaces.Remove(surfaceId); }
|
||||
}
|
||||
|
||||
public void RemoveAll()
|
||||
{
|
||||
lock (_lock) { _surfaces.Clear(); }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Apply a v0.8 dataModelUpdate batch. Each entry's <c>key</c> is appended
|
||||
/// to the optional <paramref name="basePath"/> to form the full pointer.
|
||||
/// Special case: basePath="/" or null with key="" replaces the whole tree.
|
||||
/// Coalesced — observers fire once per affected path after the batch.
|
||||
/// </summary>
|
||||
public void ApplyDataModelUpdate(string surfaceId, string? basePath, IReadOnlyList<Protocol.DataModelEntry> entries)
|
||||
{
|
||||
// Drop oversize batches at the boundary. Smaller-than-cap batches still
|
||||
// get per-entry sanity checks below.
|
||||
if (entries.Count > MaxEntriesPerUpdate)
|
||||
return;
|
||||
|
||||
SurfaceModel model;
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_surfaces.TryGetValue(surfaceId, out var existing))
|
||||
{
|
||||
existing = new SurfaceModel(new JsonObject());
|
||||
_surfaces[surfaceId] = existing;
|
||||
}
|
||||
model = existing;
|
||||
}
|
||||
|
||||
var changed = new List<string>(entries.Count);
|
||||
var prefix = NormalizePath(basePath ?? "/");
|
||||
if (prefix == "/") prefix = "";
|
||||
|
||||
foreach (var entry in entries)
|
||||
{
|
||||
// Per-entry caps: drop the entry rather than aborting the whole batch
|
||||
// (consistent with the existing "skip bad pointer" tolerance).
|
||||
if (entry.Key.Length > MaxKeyLength) continue;
|
||||
if (entry.ValueString != null && entry.ValueString.Length > MaxStringValueLength) continue;
|
||||
if (!IsWithinDepth(entry.ValueMap, depth: 1, max: MaxValueMapDepth)) continue;
|
||||
|
||||
try
|
||||
{
|
||||
var pointer = string.IsNullOrEmpty(entry.Key)
|
||||
? (string.IsNullOrEmpty(prefix) ? "/" : prefix)
|
||||
: prefix + "/" + EncodePointerToken(entry.Key);
|
||||
// SetByPointer takes the SurfaceModel.Sync lock internally.
|
||||
model.SetByPointer(pointer, entry.ToJsonNode());
|
||||
changed.Add(NormalizePath(pointer));
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
// bad pointer; skip — router logs aggregate.
|
||||
}
|
||||
}
|
||||
|
||||
if (changed.Count > 0)
|
||||
new DataModelObservable(model, _dispatcher).NotifyPaths(changed);
|
||||
}
|
||||
|
||||
private static bool IsWithinDepth(IReadOnlyList<Protocol.DataModelEntry>? map, int depth, int max)
|
||||
{
|
||||
if (map == null) return true;
|
||||
if (depth > max) return false;
|
||||
foreach (var e in map)
|
||||
if (!IsWithinDepth(e.ValueMap, depth + 1, max)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// RFC 6901 token escape: <c>~</c> → <c>~0</c>, <c>/</c> → <c>~1</c>. The
|
||||
/// caller's <c>entry.Key</c> is treated as a single pointer reference token,
|
||||
/// so a key like <c>"users/0/name"</c> escapes to one segment
|
||||
/// <c>users~10~1name</c> — it does NOT split into nested path segments.
|
||||
/// Use <c>basePath</c> to traverse into nested objects.
|
||||
/// </summary>
|
||||
private static string EncodePointerToken(string key) =>
|
||||
key.Replace("~", "~0").Replace("/", "~1");
|
||||
|
||||
public JsonNode? Read(string surfaceId, string pointer)
|
||||
{
|
||||
SurfaceModel? model;
|
||||
lock (_lock)
|
||||
{
|
||||
if (!_surfaces.TryGetValue(surfaceId, out model)) return null;
|
||||
}
|
||||
try { return model.GetByPointer(pointer); } catch { return null; }
|
||||
}
|
||||
|
||||
private static string NormalizePath(string p) =>
|
||||
string.IsNullOrEmpty(p) ? "/" : (p[0] == '/' ? p : "/" + p);
|
||||
|
||||
/// <summary>Internal mutable holder; shared between observable views.</summary>
|
||||
internal sealed class SurfaceModel
|
||||
{
|
||||
// Per-model lock guarding Root and any traversal/mutation. JsonObject
|
||||
// and JsonArray are not thread-safe, so every read AND every write must
|
||||
// go through this lock — including the deep-clone in canvas.a2ui.dump.
|
||||
public readonly object Sync = new();
|
||||
// Single dictionary keyed by normalized pointer path → list of subscribers.
|
||||
public readonly Dictionary<string, List<Action>> Subscribers = new(StringComparer.Ordinal);
|
||||
public JsonObject Root { get; private set; }
|
||||
|
||||
public SurfaceModel(JsonObject root) { Root = root; }
|
||||
|
||||
public void Replace(JsonObject newRoot) { lock (Sync) { Root = newRoot; } }
|
||||
|
||||
public JsonNode? GetByPointer(string pointer)
|
||||
{
|
||||
lock (Sync)
|
||||
{
|
||||
if (string.IsNullOrEmpty(pointer) || pointer == "/" || pointer == "")
|
||||
return Root;
|
||||
var (parent, key, isIndex, idx) = Resolve(pointer, createMissing: false);
|
||||
if (parent == null) return null;
|
||||
if (parent is JsonObject po) return po[key!];
|
||||
if (parent is JsonArray pa) return isIndex && idx >= 0 && idx < pa.Count ? pa[idx] : null;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public void SetByPointer(string pointer, JsonNode? value)
|
||||
{
|
||||
lock (Sync)
|
||||
{
|
||||
if (string.IsNullOrEmpty(pointer) || pointer == "/")
|
||||
{
|
||||
if (value is JsonObject obj) Root = obj;
|
||||
else if (value != null)
|
||||
{
|
||||
// Whole-tree replace requires an object root. Coerce a
|
||||
// scalar/array into { "value": <scalar> } rather than
|
||||
// silently dropping the write — the previous no-op
|
||||
// behaviour masked agent bugs.
|
||||
Root = new JsonObject { ["value"] = value.DeepClone() };
|
||||
}
|
||||
return;
|
||||
}
|
||||
var (parent, key, isIndex, idx) = Resolve(pointer, createMissing: true);
|
||||
if (parent is JsonObject po)
|
||||
{
|
||||
po[key!] = value;
|
||||
}
|
||||
else if (parent is JsonArray pa)
|
||||
{
|
||||
while (pa.Count <= idx) pa.Add(null);
|
||||
pa[idx] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>Atomic deep-clone of the root JsonObject. Required for snapshot/dump consumers.</summary>
|
||||
public JsonObject CloneRoot()
|
||||
{
|
||||
lock (Sync) { return (JsonObject)Root.DeepClone(); }
|
||||
}
|
||||
|
||||
private (JsonNode? parent, string? key, bool isIndex, int idx) Resolve(string pointer, bool createMissing)
|
||||
{
|
||||
var tokens = SplitPointer(pointer);
|
||||
if (tokens.Count == 0) return (Root, null, false, -1);
|
||||
|
||||
JsonNode? cursor = Root;
|
||||
for (int i = 0; i < tokens.Count - 1; i++)
|
||||
{
|
||||
var tok = tokens[i];
|
||||
if (cursor is JsonObject obj)
|
||||
{
|
||||
if (obj[tok] == null)
|
||||
{
|
||||
if (!createMissing) return (null, null, false, -1);
|
||||
var nextIsIndex = int.TryParse(tokens[i + 1], out _);
|
||||
obj[tok] = nextIsIndex ? new JsonArray() : new JsonObject();
|
||||
}
|
||||
cursor = obj[tok];
|
||||
}
|
||||
else if (cursor is JsonArray arr)
|
||||
{
|
||||
if (!int.TryParse(tok, out var ai)) return (null, null, false, -1);
|
||||
while (createMissing && arr.Count <= ai) arr.Add(null);
|
||||
if (ai < 0 || ai >= arr.Count) return (null, null, false, -1);
|
||||
cursor = arr[ai];
|
||||
}
|
||||
else
|
||||
{
|
||||
return (null, null, false, -1);
|
||||
}
|
||||
}
|
||||
|
||||
var last = tokens[^1];
|
||||
if (cursor is JsonArray finalArr && int.TryParse(last, out var idx))
|
||||
return (finalArr, last, true, idx);
|
||||
return (cursor, last, false, -1);
|
||||
}
|
||||
|
||||
private static List<string> SplitPointer(string pointer)
|
||||
{
|
||||
var p = pointer.StartsWith('/') ? pointer.Substring(1) : pointer;
|
||||
var parts = p.Split('/');
|
||||
var result = new List<string>(parts.Length);
|
||||
foreach (var part in parts)
|
||||
result.Add(part.Replace("~1", "/").Replace("~0", "~"));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// View on a SurfaceModel that exposes per-path INotifyPropertyChanged-style
|
||||
/// callbacks for binding into XAML. Multiple instances may share the same model.
|
||||
/// </summary>
|
||||
public sealed class DataModelObservable
|
||||
{
|
||||
private readonly DataModelStore.SurfaceModel _model;
|
||||
private readonly DispatcherQueue _dispatcher;
|
||||
|
||||
internal DataModelObservable(DataModelStore.SurfaceModel model, DispatcherQueue dispatcher)
|
||||
{
|
||||
_model = model;
|
||||
_dispatcher = dispatcher;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Direct reference to the root object. Callers MUST NOT mutate or
|
||||
/// enumerate concurrently with writes; prefer <see cref="CloneRoot"/> for
|
||||
/// any consumer that needs a stable view.
|
||||
/// </summary>
|
||||
public JsonObject Root => _model.Root;
|
||||
|
||||
/// <summary>Atomic deep-clone of the root object. Safe to enumerate off-dispatcher.</summary>
|
||||
public JsonObject CloneRoot() => _model.CloneRoot();
|
||||
|
||||
public JsonNode? Read(string pointer) => _model.GetByPointer(pointer);
|
||||
|
||||
public string? ReadString(string pointer)
|
||||
{
|
||||
var node = Read(pointer);
|
||||
if (node is JsonValue v && v.TryGetValue<string>(out var s)) return s;
|
||||
return node?.ToString();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Two-way write: updates the data model, notifies subscribers on the dispatcher.
|
||||
/// </summary>
|
||||
public void Write(string pointer, JsonNode? value)
|
||||
{
|
||||
try
|
||||
{
|
||||
_model.SetByPointer(pointer, value);
|
||||
NotifyPaths(new[] { Normalize(pointer) });
|
||||
}
|
||||
catch { /* swallow; bad pointer */ }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Subscribe to changes on a specific JSON Pointer path. Returns disposable
|
||||
/// that unsubscribes. Callbacks run on the dispatcher thread.
|
||||
/// </summary>
|
||||
public IDisposable Subscribe(string pointer, Action callback)
|
||||
{
|
||||
var key = Normalize(pointer);
|
||||
lock (_model.Subscribers)
|
||||
{
|
||||
if (!_model.Subscribers.TryGetValue(key, out var list))
|
||||
{
|
||||
list = new List<Action>();
|
||||
_model.Subscribers[key] = list;
|
||||
}
|
||||
list.Add(callback);
|
||||
}
|
||||
return new Subscription(_model, key, callback);
|
||||
}
|
||||
|
||||
internal void NotifyPaths(IEnumerable<string> paths)
|
||||
{
|
||||
var fired = new HashSet<Action>();
|
||||
foreach (var raw in paths)
|
||||
{
|
||||
var key = Normalize(raw);
|
||||
// Notify exact path + all ancestor paths.
|
||||
var current = key;
|
||||
while (true)
|
||||
{
|
||||
List<Action>? subs;
|
||||
lock (_model.Subscribers)
|
||||
{
|
||||
_model.Subscribers.TryGetValue(current, out subs);
|
||||
subs = subs == null ? null : new List<Action>(subs);
|
||||
}
|
||||
if (subs != null)
|
||||
{
|
||||
foreach (var s in subs)
|
||||
if (fired.Add(s)) Dispatch(s);
|
||||
}
|
||||
if (current == "/" || string.IsNullOrEmpty(current)) break;
|
||||
var slash = current.LastIndexOf('/');
|
||||
current = slash <= 0 ? "/" : current.Substring(0, slash);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal void NotifyAllPaths()
|
||||
{
|
||||
List<Action> all;
|
||||
lock (_model.Subscribers)
|
||||
{
|
||||
all = new List<Action>();
|
||||
foreach (var subs in _model.Subscribers.Values) all.AddRange(subs);
|
||||
}
|
||||
foreach (var s in all) Dispatch(s);
|
||||
}
|
||||
|
||||
private void Dispatch(Action callback)
|
||||
{
|
||||
if (_dispatcher == null || _dispatcher.HasThreadAccess) { try { callback(); } catch { } return; }
|
||||
_dispatcher.TryEnqueue(() => { try { callback(); } catch { } });
|
||||
}
|
||||
|
||||
private static string Normalize(string p) =>
|
||||
string.IsNullOrEmpty(p) ? "/" : (p[0] == '/' ? p : "/" + p);
|
||||
|
||||
private sealed class Subscription : IDisposable
|
||||
{
|
||||
private readonly DataModelStore.SurfaceModel _model;
|
||||
private readonly string _key;
|
||||
private readonly Action _cb;
|
||||
public Subscription(DataModelStore.SurfaceModel m, string k, Action c) { _model = m; _key = k; _cb = c; }
|
||||
public void Dispose()
|
||||
{
|
||||
lock (_model.Subscribers)
|
||||
{
|
||||
if (_model.Subscribers.TryGetValue(_key, out var list))
|
||||
{
|
||||
list.Remove(_cb);
|
||||
if (list.Count == 0) _model.Subscribers.Remove(_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,199 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.UI.Dispatching;
|
||||
using OpenClaw.Shared;
|
||||
using OpenClawTray.A2UI.Actions;
|
||||
using OpenClawTray.A2UI.DataModel;
|
||||
using OpenClawTray.A2UI.Protocol;
|
||||
using OpenClawTray.A2UI.Rendering;
|
||||
using OpenClawTray.A2UI.Telemetry;
|
||||
using OpenClawTray.A2UI.Theming;
|
||||
|
||||
namespace OpenClawTray.A2UI.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// Stateful per-window router. Parses inbound JSONL, dispatches to surface
|
||||
/// hosts on the UI thread, and exposes events for the window to react to
|
||||
/// (surface lifecycle).
|
||||
///
|
||||
/// Designed for single-window/multiple-surfaces — the spec leaves room for a
|
||||
/// future multi-window mode but the v1 host stacks surfaces in TabView slots.
|
||||
/// </summary>
|
||||
public sealed class A2UIRouter
|
||||
{
|
||||
/// <summary>
|
||||
/// Cap on concurrent surfaces. v0.8 deployments stack a small number of
|
||||
/// related surfaces per window; this bound keeps an adversarial agent from
|
||||
/// driving the host to OOM by creating unique surface IDs in a loop.
|
||||
/// </summary>
|
||||
internal const int MaxSurfaces = 64;
|
||||
|
||||
private readonly DispatcherQueue _dispatcher;
|
||||
private readonly DataModelStore _dataModel;
|
||||
private readonly ComponentRendererRegistry _registry;
|
||||
private readonly IActionSink _actions;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly IA2UITelemetry _telemetry;
|
||||
private readonly Dictionary<string, SurfaceHost> _surfaces = new(StringComparer.Ordinal);
|
||||
|
||||
public event EventHandler<SurfaceHost>? SurfaceCreated;
|
||||
public event EventHandler<SurfaceHost>? SurfaceRendered;
|
||||
public event EventHandler<string>? SurfaceDeleted;
|
||||
|
||||
public A2UIRouter(
|
||||
DispatcherQueue dispatcher,
|
||||
DataModelStore dataModel,
|
||||
ComponentRendererRegistry registry,
|
||||
IActionSink actions,
|
||||
IOpenClawLogger logger,
|
||||
IA2UITelemetry? telemetry = null)
|
||||
{
|
||||
_dispatcher = dispatcher;
|
||||
_dataModel = dataModel;
|
||||
_registry = registry;
|
||||
_actions = actions;
|
||||
_logger = logger;
|
||||
_telemetry = telemetry ?? NullA2UITelemetry.Instance;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Live view of the surfaces dictionary. Callers should NOT mutate or
|
||||
/// enumerate concurrently with router activity; for stable iteration use
|
||||
/// <see cref="SnapshotSurfaces"/>.
|
||||
/// </summary>
|
||||
public IReadOnlyDictionary<string, SurfaceHost> Surfaces => _surfaces;
|
||||
|
||||
/// <summary>Stable snapshot of currently-known surfaces. Safe to enumerate.</summary>
|
||||
public IReadOnlyList<KeyValuePair<string, SurfaceHost>> SnapshotSurfaces()
|
||||
{
|
||||
var copy = new List<KeyValuePair<string, SurfaceHost>>(_surfaces.Count);
|
||||
foreach (var kv in _surfaces) copy.Add(kv);
|
||||
return copy;
|
||||
}
|
||||
|
||||
/// <summary>Push a JSONL blob. Each line is parsed independently.</summary>
|
||||
public void Push(string jsonl)
|
||||
{
|
||||
foreach (var msg in A2UIMessageParser.Parse(jsonl, _logger))
|
||||
{
|
||||
DispatchOnUI(msg);
|
||||
}
|
||||
}
|
||||
|
||||
public void ResetAll()
|
||||
{
|
||||
DispatchToUI(() =>
|
||||
{
|
||||
foreach (var s in _surfaces.Values)
|
||||
{
|
||||
try { s.Dispose(); } catch { }
|
||||
SurfaceDeleted?.Invoke(this, s.SurfaceId);
|
||||
}
|
||||
_surfaces.Clear();
|
||||
_dataModel.RemoveAll();
|
||||
_logger.Info("[A2UI] reset all surfaces");
|
||||
});
|
||||
}
|
||||
|
||||
private void DispatchOnUI(A2UIMessage msg)
|
||||
{
|
||||
DispatchToUI(() =>
|
||||
{
|
||||
try { Apply(msg); }
|
||||
catch (Exception ex) { _logger.Error("[A2UI] Router apply failed", ex); }
|
||||
});
|
||||
}
|
||||
|
||||
private void DispatchToUI(Action action)
|
||||
{
|
||||
if (_dispatcher.HasThreadAccess) { action(); return; }
|
||||
// TryEnqueue returns false when the dispatcher is shutting down (or
|
||||
// its queue is at capacity). Silently dropping a router push there
|
||||
// would hide the failure from upstream callers that already returned
|
||||
// success on the wire — log a warning so we can correlate the dropped
|
||||
// surface update with whatever shutdown sequence is underway.
|
||||
if (!_dispatcher.TryEnqueue(() => action()))
|
||||
_logger.Warn("[A2UI] Router dispatch dropped: dispatcher unavailable (likely shutting down)");
|
||||
}
|
||||
|
||||
private void Apply(A2UIMessage msg)
|
||||
{
|
||||
switch (msg)
|
||||
{
|
||||
case SurfaceUpdateMessage su:
|
||||
{
|
||||
var host = GetOrCreateSurface(su.SurfaceId);
|
||||
if (host == null) break; // cap reached; logged inside GetOrCreateSurface
|
||||
host.ApplyComponents(su.Components);
|
||||
_logger.Info($"[A2UI] surfaceUpdate '{LogSafe(su.SurfaceId)}' ({su.Components.Count} component(s))");
|
||||
_telemetry.Push(su.SurfaceId, "surfaceUpdate", su.Components.Count);
|
||||
break;
|
||||
}
|
||||
|
||||
case BeginRenderingMessage br:
|
||||
{
|
||||
var host = GetOrCreateSurface(br.SurfaceId);
|
||||
if (host == null) break;
|
||||
host.BeginRendering(br.Root, br.Styles);
|
||||
SurfaceRendered?.Invoke(this, host);
|
||||
_logger.Info($"[A2UI] beginRendering '{LogSafe(br.SurfaceId)}' root='{LogSafe(br.Root)}' (catalog={LogSafe(br.CatalogId) ?? "default"})");
|
||||
_telemetry.Push(br.SurfaceId, "beginRendering", 1);
|
||||
break;
|
||||
}
|
||||
|
||||
case DataModelUpdateMessage dmu:
|
||||
{
|
||||
_dataModel.ApplyDataModelUpdate(dmu.SurfaceId, dmu.Path, dmu.Contents);
|
||||
_logger.Debug($"[A2UI] dataModelUpdate '{LogSafe(dmu.SurfaceId)}' path='{LogSafe(dmu.Path) ?? "/"}' ({dmu.Contents.Count} entry(ies))");
|
||||
_telemetry.Push(dmu.SurfaceId, "dataModelUpdate", dmu.Contents.Count);
|
||||
break;
|
||||
}
|
||||
|
||||
case DeleteSurfaceMessage ds:
|
||||
{
|
||||
if (_surfaces.TryGetValue(ds.SurfaceId, out var existing))
|
||||
{
|
||||
existing.Dispose();
|
||||
_surfaces.Remove(ds.SurfaceId);
|
||||
_dataModel.Remove(ds.SurfaceId);
|
||||
SurfaceDeleted?.Invoke(this, ds.SurfaceId);
|
||||
_logger.Info($"[A2UI] deleteSurface '{LogSafe(ds.SurfaceId)}'");
|
||||
_telemetry.Push(ds.SurfaceId, "deleteSurface", 1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case UnknownEnvelopeMessage ue:
|
||||
_logger.Warn($"[A2UI] Unknown envelope kind '{LogSafe(ue.Kind)}'; skipping");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private SurfaceHost? GetOrCreateSurface(string surfaceId)
|
||||
{
|
||||
if (_surfaces.TryGetValue(surfaceId, out var existing)) return existing;
|
||||
if (_surfaces.Count >= MaxSurfaces)
|
||||
{
|
||||
// Cap reached. The previous "degraded fallback" returned the first
|
||||
// existing surface, which corrupted unrelated surface state — a
|
||||
// surfaceUpdate aimed at the new id would clobber the components
|
||||
// of an entirely different surface. Skip the message instead and
|
||||
// let the cap log + telemetry counter signal the misbehavior.
|
||||
_logger.Warn($"[A2UI] surface cap ({MaxSurfaces}) reached; dropping push for new surface '{LogSafe(surfaceId)}'");
|
||||
return null;
|
||||
}
|
||||
|
||||
var observable = _dataModel.GetOrCreate(surfaceId);
|
||||
var host = new SurfaceHost(surfaceId, observable, _registry, _actions, _logger);
|
||||
_surfaces[surfaceId] = host;
|
||||
SurfaceCreated?.Invoke(this, host);
|
||||
return host;
|
||||
}
|
||||
|
||||
private static string LogSafe(string s)
|
||||
{
|
||||
if (string.IsNullOrEmpty(s)) return string.Empty;
|
||||
var trimmed = s.Length > 64 ? s.Substring(0, 64) : s;
|
||||
return trimmed.Replace('\r', ' ').Replace('\n', ' ').Replace('\t', ' ');
|
||||
}
|
||||
}
|
||||
@ -1,305 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using Microsoft.UI.Xaml;
|
||||
using Microsoft.UI.Xaml.Controls;
|
||||
using Microsoft.UI.Xaml.Media;
|
||||
using OpenClaw.Shared;
|
||||
using OpenClawTray.A2UI.Actions;
|
||||
using OpenClawTray.A2UI.DataModel;
|
||||
using OpenClawTray.A2UI.Protocol;
|
||||
using OpenClawTray.A2UI.Rendering;
|
||||
using OpenClawTray.A2UI.Theming;
|
||||
|
||||
namespace OpenClawTray.A2UI.Hosting;
|
||||
|
||||
/// <summary>
|
||||
/// One per active surface. Owns the component definition table, rebuilds the
|
||||
/// XAML tree from the declared root, and exposes a single root element that
|
||||
/// the canvas window slots into a content host.
|
||||
///
|
||||
/// Lifecycle in v0.8:
|
||||
/// surfaceUpdate (defs come in) → ApplyComponents
|
||||
/// beginRendering (root + style) → BeginRendering, triggers Build
|
||||
/// dataModelUpdate → store applies; subscribed renderers refresh
|
||||
/// A re-issued surfaceUpdate with the same surfaceId patches in place.
|
||||
/// </summary>
|
||||
public sealed class SurfaceHost : IDisposable
|
||||
{
|
||||
// Hard caps that keep an adversarial / buggy agent from collapsing the UI thread.
|
||||
// Cycle and depth guards above; component count guards a million-node fan-out.
|
||||
internal const int MaxRenderDepth = 64;
|
||||
internal const int MaxComponentsPerSurface = 2000;
|
||||
|
||||
private readonly DataModelObservable _dataModel;
|
||||
private readonly ComponentRendererRegistry _registry;
|
||||
private readonly IActionSink _actions;
|
||||
private readonly IOpenClawLogger _logger;
|
||||
private readonly Dictionary<string, IDisposable> _subscriptions = new(StringComparer.Ordinal);
|
||||
private readonly Dictionary<string, A2UIComponentDef> _defs = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _renderingIds = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _secretPaths = new(StringComparer.Ordinal);
|
||||
private readonly Grid _root;
|
||||
private A2UITheme _theme;
|
||||
private string? _rootId;
|
||||
private int _renderDepth;
|
||||
private int _renderCount;
|
||||
private MediaLoadBudget _mediaBudget = new();
|
||||
|
||||
public string SurfaceId { get; }
|
||||
public string? Title { get; private set; }
|
||||
public FrameworkElement RootElement => _root;
|
||||
|
||||
public SurfaceHost(
|
||||
string surfaceId,
|
||||
DataModelObservable dataModel,
|
||||
ComponentRendererRegistry registry,
|
||||
IActionSink actions,
|
||||
IOpenClawLogger? logger = null)
|
||||
{
|
||||
SurfaceId = surfaceId;
|
||||
_dataModel = dataModel;
|
||||
_registry = registry;
|
||||
_actions = actions;
|
||||
_logger = logger ?? NullLogger.Instance;
|
||||
_theme = A2UITheme.Empty;
|
||||
_root = new Grid { Padding = new Thickness(16) };
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Add or replace components in the definition table. If a root has
|
||||
/// already been declared, rebuild the visual tree — but only if the
|
||||
/// incoming defs actually change something. A surfaceUpdate that re-sends
|
||||
/// already-known components verbatim no-ops; this preserves caret / scroll
|
||||
/// / tab selection for agents that re-emit the full surface as their
|
||||
/// "update" mechanism. Spec §3.3 calls for full structural diffing
|
||||
/// (M1 in unified review); this is a partial down payment that catches
|
||||
/// the most common case without per-component XAML element tracking.
|
||||
/// </summary>
|
||||
public void ApplyComponents(IReadOnlyList<A2UIComponentDef> components)
|
||||
{
|
||||
bool anyChanged = false;
|
||||
foreach (var def in components)
|
||||
{
|
||||
if (_defs.Count >= MaxComponentsPerSurface && !_defs.ContainsKey(def.Id))
|
||||
{
|
||||
// Cap the dictionary at the same bound the renderer enforces, so a
|
||||
// malicious surfaceUpdate can't fill memory with definitions that
|
||||
// never render anyway.
|
||||
_logger.Warn($"[A2UI] component cap ({MaxComponentsPerSurface}) on surface '{SurfaceId}'; dropping '{LogSafe(def.Id)}'");
|
||||
continue;
|
||||
}
|
||||
if (!_defs.TryGetValue(def.Id, out var existing) || !ComponentsEqual(existing, def))
|
||||
{
|
||||
_defs[def.Id] = def;
|
||||
anyChanged = true;
|
||||
}
|
||||
}
|
||||
if (anyChanged && _rootId != null) Rebuild();
|
||||
}
|
||||
|
||||
private static bool ComponentsEqual(A2UIComponentDef a, A2UIComponentDef b)
|
||||
{
|
||||
if (!string.Equals(a.ComponentName, b.ComponentName, StringComparison.Ordinal)) return false;
|
||||
if (a.Weight != b.Weight) return false;
|
||||
// JsonObject equality: serialize and compare. Properties are small
|
||||
// (per-component < a few KiB after the M5 size caps) so the cost is
|
||||
// negligible compared to the XAML rebuild it might avoid.
|
||||
var sa = a.Properties.ToJsonString();
|
||||
var sb = b.Properties.ToJsonString();
|
||||
return string.Equals(sa, sb, StringComparison.Ordinal);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Declare which component is the root and apply surface-level styles.
|
||||
/// Triggers an immediate rebuild.
|
||||
/// </summary>
|
||||
public void BeginRendering(string rootId, System.Text.Json.Nodes.JsonObject? styles)
|
||||
{
|
||||
_rootId = rootId;
|
||||
_theme = A2UITheme.Parse(styles);
|
||||
// Surface title is optional and lives in the styles bag in v0.8.
|
||||
// Falling back to null lets the window title default to "Canvas".
|
||||
if (styles is not null && styles["title"] is System.Text.Json.Nodes.JsonValue tv
|
||||
&& tv.TryGetValue<string>(out var titleStr) && !string.IsNullOrWhiteSpace(titleStr))
|
||||
{
|
||||
Title = titleStr;
|
||||
}
|
||||
else
|
||||
{
|
||||
Title = null;
|
||||
}
|
||||
ApplyThemeToScope(_root, _theme);
|
||||
Rebuild();
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
DisposeSubscriptions();
|
||||
_defs.Clear();
|
||||
_root.Children.Clear();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// JSON snapshot of this surface's logical state — components (id +
|
||||
/// componentName + properties), declared root, and the current data
|
||||
/// model tree. Used by <c>canvas.a2ui.dump</c> for headless verification.
|
||||
/// Sensitive paths (obscured fields + denylist matches) are redacted.
|
||||
/// </summary>
|
||||
public System.Text.Json.Nodes.JsonObject GetSnapshot()
|
||||
{
|
||||
var components = new System.Text.Json.Nodes.JsonArray();
|
||||
foreach (var def in _defs.Values)
|
||||
{
|
||||
var entry = new System.Text.Json.Nodes.JsonObject
|
||||
{
|
||||
["id"] = def.Id,
|
||||
["componentName"] = def.ComponentName,
|
||||
["properties"] = def.Properties.DeepClone(),
|
||||
};
|
||||
if (def.Weight is { } w) entry["weight"] = w;
|
||||
components.Add(entry);
|
||||
}
|
||||
return new System.Text.Json.Nodes.JsonObject
|
||||
{
|
||||
["surfaceId"] = SurfaceId,
|
||||
["root"] = _rootId,
|
||||
["components"] = components,
|
||||
// CloneRoot snapshots under the model lock so a concurrent SetByPointer
|
||||
// can't produce a half-mutated tree mid-clone. RedactInPlace avoids the
|
||||
// second DeepClone the public Redact does.
|
||||
["dataModel"] = SecretRedactor.RedactInPlace(_dataModel.CloneRoot(), _secretPaths),
|
||||
};
|
||||
}
|
||||
|
||||
/// <summary>True if the path was registered as secret (e.g., obscured TextField bound there).</summary>
|
||||
internal bool IsSecretPath(string? path) => SecretRedactor.IsSecret(path, _secretPaths);
|
||||
|
||||
private void Rebuild()
|
||||
{
|
||||
DisposeSubscriptions();
|
||||
_root.Children.Clear();
|
||||
_renderingIds.Clear();
|
||||
_secretPaths.Clear();
|
||||
_renderDepth = 0;
|
||||
_renderCount = 0;
|
||||
_mediaBudget = new MediaLoadBudget();
|
||||
if (_rootId == null) return;
|
||||
|
||||
var built = BuildElement(_rootId);
|
||||
if (built != null) _root.Children.Add(built);
|
||||
}
|
||||
|
||||
private FrameworkElement? BuildElement(string id)
|
||||
{
|
||||
if (!_defs.TryGetValue(id, out var def))
|
||||
return null;
|
||||
|
||||
if (_renderingIds.Contains(id))
|
||||
{
|
||||
_logger.Warn($"[A2UI] cycle detected on surface '{SurfaceId}' component '{LogSafe(id)}'; rendering placeholder");
|
||||
return BuildErrorPlaceholder(def.ComponentName, "cycle detected");
|
||||
}
|
||||
if (_renderDepth >= MaxRenderDepth)
|
||||
{
|
||||
_logger.Warn($"[A2UI] depth cap ({MaxRenderDepth}) on surface '{SurfaceId}' at component '{LogSafe(id)}'");
|
||||
return BuildErrorPlaceholder(def.ComponentName, $"depth cap ({MaxRenderDepth})");
|
||||
}
|
||||
if (_renderCount >= MaxComponentsPerSurface)
|
||||
{
|
||||
_logger.Warn($"[A2UI] component cap ({MaxComponentsPerSurface}) on surface '{SurfaceId}' at component '{LogSafe(id)}'");
|
||||
return BuildErrorPlaceholder(def.ComponentName, $"component cap ({MaxComponentsPerSurface})");
|
||||
}
|
||||
|
||||
_renderingIds.Add(id);
|
||||
_renderDepth++;
|
||||
_renderCount++;
|
||||
try
|
||||
{
|
||||
var renderer = _registry.GetOrUnknown(def.ComponentName);
|
||||
var ctx = new RenderContext
|
||||
{
|
||||
SurfaceId = SurfaceId,
|
||||
DataModel = _dataModel,
|
||||
Actions = _actions,
|
||||
Theme = _theme,
|
||||
BuildChild = BuildElement,
|
||||
Subscriptions = _subscriptions,
|
||||
SecretPaths = _secretPaths,
|
||||
Logger = _logger,
|
||||
MediaBudget = _mediaBudget,
|
||||
};
|
||||
|
||||
try { return renderer.Render(def, ctx); }
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Renderer failure should never crash the surface. Don't reroute through
|
||||
// the registry — that's how we lose the real component name when fallback
|
||||
// also fails. Render an inline placeholder showing actual name + message.
|
||||
_logger.Warn($"[A2UI] renderer for '{def.ComponentName}' threw: {ex.Message}");
|
||||
return BuildErrorPlaceholder(def.ComponentName, ex.Message);
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
_renderingIds.Remove(id);
|
||||
_renderDepth--;
|
||||
}
|
||||
}
|
||||
|
||||
private static FrameworkElement BuildErrorPlaceholder(string componentName, string message)
|
||||
{
|
||||
var stack = new StackPanel
|
||||
{
|
||||
Orientation = Orientation.Horizontal,
|
||||
Spacing = 8,
|
||||
Padding = new Thickness(8),
|
||||
BorderThickness = new Thickness(1),
|
||||
BorderBrush = new SolidColorBrush(Microsoft.UI.Colors.OrangeRed),
|
||||
CornerRadius = new CornerRadius(4),
|
||||
};
|
||||
stack.Children.Add(new FontIcon
|
||||
{
|
||||
Glyph = "",
|
||||
FontFamily = new FontFamily("Segoe Fluent Icons"),
|
||||
});
|
||||
stack.Children.Add(new TextBlock
|
||||
{
|
||||
Text = $"{componentName}: {message}",
|
||||
VerticalAlignment = VerticalAlignment.Center,
|
||||
TextWrapping = TextWrapping.Wrap,
|
||||
});
|
||||
return stack;
|
||||
}
|
||||
|
||||
private static string LogSafe(string s)
|
||||
{
|
||||
if (string.IsNullOrEmpty(s)) return string.Empty;
|
||||
var trimmed = s.Length > 64 ? s.Substring(0, 64) : s;
|
||||
return trimmed.Replace('\r', ' ').Replace('\n', ' ').Replace('\t', ' ');
|
||||
}
|
||||
|
||||
private void DisposeSubscriptions()
|
||||
{
|
||||
foreach (var s in _subscriptions.Values)
|
||||
{
|
||||
try { s.Dispose(); } catch { }
|
||||
}
|
||||
_subscriptions.Clear();
|
||||
}
|
||||
|
||||
private static void ApplyThemeToScope(FrameworkElement element, A2UITheme theme)
|
||||
{
|
||||
if (theme == A2UITheme.Empty) return;
|
||||
|
||||
var resources = element.Resources;
|
||||
if (theme.Accent is { } accent)
|
||||
{
|
||||
resources["A2UIAccentBrush"] = new SolidColorBrush(accent);
|
||||
resources["AccentFillColorDefaultBrush"] = new SolidColorBrush(accent);
|
||||
}
|
||||
if (theme.Foreground is { } fg)
|
||||
resources["A2UIForegroundBrush"] = new SolidColorBrush(fg);
|
||||
if (theme.FontFamily is { } font && !string.IsNullOrWhiteSpace(font))
|
||||
resources["A2UIFontFamily"] = new FontFamily(font);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user