diff --git a/CHANGELOG.md b/CHANGELOG.md index e3c5e2e..e4846e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Added per-lease portal detail pages with bridge status, pasteable commands, recent run links, and a stop action. - Added `.crabboxignore` for repo-local sync-only exclude patterns shared by `run` and `sync-plan`. - Documented the prebaked runner image boundary: provider-owned AMIs/snapshots hold machine capabilities while repo/runtime caches stay in QA workflows or warm leases. +- Added a provider backend registry and authoring guide so delegated and SSH-backed providers can live in provider-owned packages while core keeps command parsing, rendering, and capability validation. ### Fixed @@ -33,6 +34,7 @@ - Fixed remote git seeding so an unfetchable local commit cannot leave an empty `.git` worktree that makes sync sanity report every tracked file as deleted. - Skipped remote git seeding for local commits that are not present in any remote-tracking ref, avoiding slow doomed clone/fetch attempts before rsync. - Fixed Windows archive sync from macOS so Apple extended attributes do not spam remote tar warnings. +- Fixed provider-owned flags and target/capability validation to run through registered provider specs while preserving script-facing list JSON compatibility for coordinator and Blacksmith backends. ## 0.5.0 - 2026-05-04 diff --git a/cmd/crabbox/main.go b/cmd/crabbox/main.go index 5c899d7..4df25a6 100644 --- a/cmd/crabbox/main.go +++ b/cmd/crabbox/main.go @@ -6,6 +6,7 @@ import ( "os" "github.com/openclaw/crabbox/internal/cli" + _ "github.com/openclaw/crabbox/internal/providers/all" ) func main() { diff --git a/docs/README.md b/docs/README.md index 6a54521..8b64c8f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -78,7 +78,7 @@ Pick whichever matches your intent: - **Get the mental model:** [How Crabbox Works](how-it-works.md), [Architecture](architecture.md), [Orchestrator](orchestrator.md). - **Use the CLI:** [CLI](cli.md), [Commands](commands/README.md), [Features](features/README.md), [Actions hydration](features/actions-hydration.md). -- **Pick a target:** [Providers](features/providers.md), [AWS](features/aws.md), [Hetzner](features/hetzner.md), [Blacksmith Testbox](features/blacksmith-testbox.md), [Interactive desktop and VNC](features/interactive-desktop-vnc.md). +- **Pick or add a target:** [Providers](features/providers.md), [Provider backends](provider-backends.md), [AWS](features/aws.md), [Hetzner](features/hetzner.md), [Blacksmith Testbox](features/blacksmith-testbox.md), [Interactive desktop and VNC](features/interactive-desktop-vnc.md). - **Operate it:** [Operations](operations.md), [Observability](observability.md), [Troubleshooting](troubleshooting.md), [Performance](performance.md). - **Set it up or audit it:** [Infrastructure](infrastructure.md), [Security](security.md), [Source Map](source-map.md), [MVP Plan](mvp-plan.md). diff --git a/docs/commands/list.md b/docs/commands/list.md index 6f583c6..e35a980 100644 --- a/docs/commands/list.md +++ b/docs/commands/list.md @@ -14,7 +14,10 @@ crabbox list --json In `provider=ssh` mode this prints the configured static target. -In `blacksmith-testbox` mode this forwards to `blacksmith testbox list`. Human output preserves the Blacksmith table; `--json` emits Crabbox-parsed rows with id, status, repo, workflow, job, ref, and created time when the upstream table exposes those columns. +In `blacksmith-testbox` mode this reads `blacksmith testbox list` and renders the +same Crabbox list shape as other providers. `--json` keeps the compatibility +shape parsed from the Blacksmith table: id, status, repo, workflow, job, ref, +and created time when the upstream table exposes those columns. Flags: diff --git a/docs/commands/status.md b/docs/commands/status.md index 26e84eb..f689a79 100644 --- a/docs/commands/status.md +++ b/docs/commands/status.md @@ -10,7 +10,12 @@ crabbox status --id blue-lobster --json crabbox status --provider ssh --target macos --static-host mac-studio.local ``` -`--id` accepts the canonical `cbx_...` ID or active slug. In `blacksmith-testbox` mode it accepts a `tbx_...` ID or local slug and forwards to `blacksmith testbox status`. In `provider=ssh` mode `--id` is optional and resolves the configured static target or local claim. Plain status is read-only; `--wait` touches the lease while waiting for Crabbox brokered leases. +`--id` accepts the canonical `cbx_...` ID or active slug. In +`blacksmith-testbox` mode it accepts a `tbx_...` ID or local slug and derives a +normalized Crabbox status view from `blacksmith testbox list --all`. In +`provider=ssh` mode `--id` is optional and resolves the configured static target +or local claim. Plain status is read-only; `--wait` touches the lease while +waiting for Crabbox brokered leases. Flags: diff --git a/docs/features/README.md b/docs/features/README.md index 92f6f6d..1e7426e 100644 --- a/docs/features/README.md +++ b/docs/features/README.md @@ -13,6 +13,7 @@ Core features: - [Coordinator](coordinator.md): brokered leases through Cloudflare Workers and Durable Objects. - [Broker auth and routing](broker-auth-routing.md): GitHub login, shared bearer tokens, optional Cloudflare Access, and Worker routes. - [Providers](providers.md): provider overview, target matrix, classes, and fallback. +- [Provider backends](../provider-backends.md): implementation guide for adding a new provider/backend/plugin. - [AWS](aws.md): EC2 Linux, Windows, WSL2, EC2 Mac, capacity, AMIs, and security groups. - [Hetzner](hetzner.md): Linux-only managed Hetzner behavior, classes, and cleanup. - [Blacksmith Testbox](blacksmith-testbox.md): delegated Testbox backend behavior. diff --git a/docs/features/blacksmith-testbox.md b/docs/features/blacksmith-testbox.md index 7c89ac4..4e6bfa4 100644 --- a/docs/features/blacksmith-testbox.md +++ b/docs/features/blacksmith-testbox.md @@ -75,20 +75,23 @@ For repos that already use Crabbox Actions hydration, `blacksmith.workflow`, `bl ## Forwarded Commands -Crabbox forwards machine operations to the Blacksmith CLI: +Crabbox forwards lifecycle and run operations to the Blacksmith CLI: ```sh blacksmith testbox warmup --job --ref --ssh-public-key --idle-timeout blacksmith testbox run --id --ssh-private-key -blacksmith testbox status --id blacksmith testbox list +blacksmith testbox list --all blacksmith testbox stop --id ``` -The wrapper is deliberately thin. If Blacksmith adds behavior to those commands, Crabbox should prefer forwarding rather than reimplementing it. +The wrapper is deliberately thin for warmup, run, and stop. `crabbox list` and +`crabbox status` normalize Blacksmith data into Crabbox's common list/status +views so rendering stays core-owned across providers. Status currently reads +`blacksmith testbox list --all` to build that view. `crabbox list --provider blacksmith-testbox --json` parses the Blacksmith table -output into JSON rows with the fields Crabbox can see. That parser is a +output into compatibility JSON rows with the fields Crabbox can see. That parser is a compatibility layer, not a Blacksmith API contract. If the Blacksmith CLI adds native JSON output, Crabbox should switch to that and drop table parsing. diff --git a/docs/features/providers.md b/docs/features/providers.md index fcc5a3c..82bddf5 100644 --- a/docs/features/providers.md +++ b/docs/features/providers.md @@ -26,6 +26,7 @@ ssh Existing SSH host selected by static.host - [AWS](aws.md): EC2 Linux, Windows, WSL2, EC2 Mac, capacity, AMIs, and security groups. - [Hetzner](hetzner.md): Linux-only managed provider behavior, classes, and cleanup. - [Blacksmith Testbox](blacksmith-testbox.md): delegated Testbox backend behavior. +- [Provider backends](../provider-backends.md): implementation guide for adding a new provider/backend/plugin. ## Hetzner Summary diff --git a/docs/provider-backends.md b/docs/provider-backends.md new file mode 100644 index 0000000..45854d4 --- /dev/null +++ b/docs/provider-backends.md @@ -0,0 +1,591 @@ +# Provider Backends + +Read when: + +- adding a new Crabbox provider; +- deciding between an SSH lease backend and a delegated run backend; +- adding provider-specific flags or config; +- reviewing a provider PR for the right ownership boundary; +- designing a future external provider plugin protocol. + +Crabbox providers are built around one rule: + +Providers configure backends. Core commands own workflows. + +That keeps `crabbox run`, `warmup`, `list`, `status`, `stop`, `cleanup`, +Actions hydration, sync, result collection, rendering, and timing consistent +across providers. A provider should describe what it can do and return a backend +object. It should not fork the command surface. + +## Choose The Backend Shape + +Start by choosing the execution model. + +### SSH Lease Backend + +Use `SSHLeaseBackend` when the provider can hand Crabbox an SSH target. + +Examples: + +- Hetzner Cloud +- AWS EC2 +- static SSH hosts +- a future Daytona sandbox if it exposes stable SSH access + +Crabbox core owns the normal workflow after acquisition: + +- claim and slug handling; +- SSH readiness checks; +- network target resolution; +- sync and sync guardrails; +- command wrapping and streaming; +- JUnit/result collection; +- Actions runner hydration over SSH; +- heartbeat/touch; +- release. + +The backend owns only provider lifecycle: + +```go +type SSHLeaseBackend interface { + Backend + + Acquire(ctx context.Context, req AcquireRequest) (LeaseTarget, error) + Resolve(ctx context.Context, req ResolveRequest) (LeaseTarget, error) + List(ctx context.Context, req ListRequest) ([]LeaseView, error) + ReleaseLease(ctx context.Context, req ReleaseLeaseRequest) error + Touch(ctx context.Context, req TouchRequest) (Server, error) +} +``` + +Implement this when `LeaseTarget.SSH` can be populated with host, port, user, +key, work root, target OS, and Windows mode. + +### Delegated Run Backend + +Use `DelegatedRunBackend` when the provider owns execution instead of exposing +Crabbox-managed SSH. + +Examples: + +- Blacksmith Testbox +- a future Islo backend if it owns workspace setup and command streaming +- a future external runner service that accepts a command and streams output + +The delegated backend owns warmup, command execution, output streaming, and +stop. Crabbox core still owns provider selection, config loading, local claims, +friendly slugs, timing summaries, and normalized list/status rendering. + +```go +type DelegatedRunBackend interface { + Backend + + Warmup(ctx context.Context, req WarmupRequest) error + Run(ctx context.Context, req RunRequest) (RunResult, error) + List(ctx context.Context, req ListRequest) ([]LeaseView, error) + Status(ctx context.Context, req StatusRequest) (statusView, error) + Stop(ctx context.Context, req StopRequest) error +} +``` + +The current implementation still returns the unexported `statusView`. That means +a delegated backend implementation cannot live entirely outside `internal/cli` +yet. Keep delegated backend implementations in `internal/cli`, or expose a +narrow constructor from `internal/cli` and let the provider package own only +registration/spec/flags/configure. Exporting `StatusView` is the next cleanup +before delegated backends can move fully into `internal/providers/`. + +A delegated backend must reject sync-only options that Crabbox cannot honor: + +```go +if err := rejectDelegatedSyncOptions(providerName, req); err != nil { + return RunResult{}, err +} +``` + +`rejectDelegatedSyncOptions` is currently an `internal/cli` helper. Delegated +backends outside `internal/cli` need an exported equivalent before they can use +this directly. + +Do not pretend a delegated provider is SSH-like unless the provider has a stable +SSH contract. If Crabbox cannot run rsync and remote commands itself, use +`DelegatedRunBackend`. + +### Optional Interfaces + +Add optional capabilities as small interfaces instead of widening every backend. + +Cleanup is already optional: + +```go +type CleanupBackend interface { + Backend + + Cleanup(ctx context.Context, req CleanupRequest) error +} +``` + +List JSON compatibility is optional: + +```go +type JSONListBackend interface { + Backend + + ListJSON(ctx context.Context, req ListRequest) (any, error) +} +``` + +`JSONListBackend` is a compatibility escape hatch for script-facing JSON shapes. +Use it only when an existing provider already exposed a different JSON schema +than the normalized `[]LeaseView` shape. + +Future provider-specific capability areas should follow the same pattern, for +example pricing or image management. + +## Package Layout + +Built-in providers live under `internal/providers/`: + +```text +internal/providers/all +internal/providers/hetzner +internal/providers/aws +internal/providers/ssh +internal/providers/blacksmith +``` + +Each provider package owns registration, provider name, aliases, spec, +provider-specific flags, and backend configuration. `cmd/crabbox` imports +`internal/providers/all` for side-effect registration: + +```go +import ( + "github.com/openclaw/crabbox/internal/cli" + _ "github.com/openclaw/crabbox/internal/providers/all" +) +``` + +The core provider contract and current backend implementations live in +`internal/cli`: + +```text +internal/cli/provider_backend.go # interfaces, registry, request/result types +internal/cli/providers_common.go # shared direct SSH backend helpers +internal/cli/provider_aws.go # AWS SSH lease backend implementation +internal/cli/provider_hetzner.go # Hetzner SSH lease backend implementation +internal/cli/provider_static.go # static SSH lease backend implementation +internal/cli/provider_coordinator.go # brokered coordinator lease backend +internal/cli/provider_blacksmith.go # existing delegated Blacksmith backend +``` + +This split is intentional. Existing built-ins still use a broad set of +unexported lifecycle helpers for SSH keys, labels, slugs, claims, coordinator +heartbeats, sync, timing, and release. Provider packages should depend only on +the exported contract. Move backend implementation code into +`internal/providers/` only when the required helper surface is small and +intentionally exported. + +New providers should start in their own provider folder. If an SSH backend can +be implemented against the exported contract, keep it there. If it needs +temporary core helpers, expose a narrow constructor or helper from +`internal/cli` rather than exporting a large grab bag. Delegated backends cannot +move fully out of `internal/cli` until `statusView` and delegated sync-option +validation are exported. + +## Provider Registration + +A provider implements `cli.Provider`: + +```go +type Provider interface { + Name() string + Aliases() []string + Spec() ProviderSpec + + RegisterFlags(fs *flag.FlagSet, defaults Config) any + ApplyFlags(cfg *Config, fs *flag.FlagSet, values any) error + + Configure(cfg Config, rt Runtime) (Backend, error) +} +``` + +Minimal SSH provider package: + +```go +package example + +import ( + "flag" + + "github.com/openclaw/crabbox/internal/cli" +) + +func init() { + cli.RegisterProvider(Provider{}) +} + +type Provider struct{} + +func (Provider) Name() string { return "example" } +func (Provider) Aliases() []string { return nil } + +func (Provider) Spec() cli.ProviderSpec { + return cli.ProviderSpec{ + Name: "example", + Kind: cli.ProviderKindSSHLease, + Targets: []cli.TargetSpec{ + {OS: "linux"}, + }, + Features: cli.FeatureSet{ + cli.FeatureSSH, + cli.FeatureCrabboxSync, + }, + Coordinator: cli.CoordinatorNever, + } +} + +func (Provider) RegisterFlags(*flag.FlagSet, cli.Config) any { + return cli.NoProviderFlags() +} + +func (Provider) ApplyFlags(*cli.Config, *flag.FlagSet, any) error { + return nil +} + +func (p Provider) Configure(cfg cli.Config, rt cli.Runtime) (cli.Backend, error) { + return cli.NewExampleLeaseBackend(p.Spec(), cfg, rt), nil +} +``` + +`NewExampleLeaseBackend` stands in for the backend constructor you add for the +provider. Existing providers use constructors such as `NewAWSLeaseBackend` and +`NewBlacksmithBackend`. + +Then add the provider to `internal/providers/all/all.go`: + +```go +import _ "github.com/openclaw/crabbox/internal/providers/example" +``` + +Tests in `internal/cli` do not import `internal/providers/all`, because that +would create an import cycle. Register test providers from a same-package test +file when testing core dispatch. + +## Provider Spec + +`ProviderSpec` is command-facing metadata: + +```go +type ProviderSpec struct { + Name string + Kind ProviderKind + Targets []TargetSpec + Features FeatureSet + Coordinator CoordinatorMode +} +``` + +Use canonical provider names in docs and config. Aliases are for compatibility. + +Pick `Kind` carefully: + +- `ProviderKindSSHLease`: provider returns SSH targets and Crabbox owns sync/run. +- `ProviderKindDelegatedRun`: provider owns execution and output streaming. + +Targets should describe what the provider can actually satisfy. Do not list +`windows`, `macos`, `desktop`, `browser`, or `code` unless the backend supports +that path end to end. + +Feature flags should be concrete: + +```go +cli.FeatureSSH +cli.FeatureCrabboxSync +cli.FeatureCleanup +cli.FeatureDesktop +cli.FeatureBrowser +cli.FeatureCode +cli.FeatureTailscale +``` + +Actions runner hydration is intentionally not a provider feature. It is a core +SSH-over-Linux workflow. It requires: + +- an SSH lease backend; +- `target=linux`; +- no delegated execution. + +Only set `CoordinatorSupported` when the Crabbox coordinator can provision that +provider. A direct-only SSH provider should use `CoordinatorNever`. + +## Flags And Config + +Provider flags are registered before parsing because Go's `flag` package rejects +unknown flags. `RegisterFlags` must be cheap and side-effect free. It returns an +opaque values struct that is passed back into `ApplyFlags` only after config and +common flags select the provider. + +Pattern, when the provider has an exported flag helper or lives in `internal/cli`: + +```go +type exampleFlagValues struct { + Region *string +} + +func (Provider) RegisterFlags(fs *flag.FlagSet, defaults cli.Config) any { + return exampleFlagValues{ + Region: fs.String("example-region", defaults.Example.Region, "Example region"), + } +} + +func (Provider) ApplyFlags(cfg *cli.Config, fs *flag.FlagSet, values any) error { + v, ok := values.(exampleFlagValues) + if !ok { + return nil + } + if cli.FlagWasSet(fs, "example-region") { + cfg.Example.Region = *v.Region + } + return nil +} +``` + +`Config` does not yet have a generic provider config bag. New provider packages +should either: + +- add typed config fields and use `cli.FlagWasSet` from the provider package; or +- expose a small provider-specific flag helper from `internal/cli`, as + Blacksmith does, when the config type is not ready to export cleanly. + +If a provider needs durable config, add typed config fields in `Config` and env +overrides in `config.go`. Keep compatibility shims for existing top-level +provider config, but prefer `providers.` for new provider families once +that config bag lands. + +Never pass provider secrets as command-line arguments. Use environment variables, +local SDK config, the coordinator, or a credential store outside repo config. + +## Runtime + +Backends receive a narrow runtime: + +```go +type Runtime struct { + Stdout io.Writer + Stderr io.Writer + Clock Clock + HTTP *http.Client + Exec CommandRunner +} +``` + +Use it instead of `App`, global clocks, or package-level command hooks. + +Delegated CLI integrations must use `Runtime.Exec`: + +```go +result, err := rt.Exec.Run(ctx, cli.LocalCommandRequest{ + Name: "provider-cli", + Args: args, + Stdout: rt.Stdout, + Stderr: rt.Stderr, +}) +``` + +This gives tests a fake command runner and avoids package-level +`exec.CommandContext` seams. + +Use `Runtime.Clock` for timing in backend code. Use `Runtime.Stdout` and +`Runtime.Stderr` for streaming and warnings. + +## Implementing An SSH Lease Backend + +An SSH lease backend should return a complete `LeaseTarget`: + +```go +type LeaseTarget struct { + Server Server + SSH SSHTarget + LeaseID string + Coordinator *CoordinatorClient +} +``` + +`Acquire` should: + +1. validate direct-provider prerequisites; +2. mint or accept the lease id handled by the request path; +3. ensure or install the SSH key; +4. provision the machine or sandbox; +5. wait until an address exists; +6. populate `SSHTarget`; +7. wait for SSH readiness when the provider owns boot; +8. mark provider labels/tags as ready; +9. return `LeaseTarget`. + +`Resolve` should accept canonical lease IDs, provider IDs, names, and slugs +where the provider can support them. It should return the stored per-lease SSH +key when available. + +`List` returns normalized `LeaseView` values. Do not print from `List`; command +rendering belongs to core. + +`Touch` should update provider labels/tags with idle and state metadata when the +provider supports it. Static providers can update only the in-memory view. + +`ReleaseLease` should be idempotent where practical. Remove local claims after +the provider release succeeds or is known to be unnecessary. + +If cleanup is meaningful, implement `CleanupBackend`. Cleanup should honor +`DryRun`, log skip/delete decisions to stderr, and use provider labels to avoid +deleting unrelated machines. + +## Implementing A Delegated Run Backend + +A delegated backend should preserve Crabbox ergonomics while letting the provider +own the remote workflow. + +`Warmup` should: + +1. validate provider-specific workflow config; +2. create or warm the provider resource; +3. claim the resource locally with provider name and slug; +4. print the standard warmup summary; +5. write timing JSON when requested. + +`Run` should: + +1. reject unsupported Crabbox sync options; +2. acquire a resource or resolve an existing id/slug; +3. claim/reclaim the resource for the repo; +4. stream provider output through `Runtime.Stdout` and `Runtime.Stderr`; +5. return `RunResult`; +6. stop temporary resources when `Keep` is false. + +`List` and `Status` should return normalized views. If the provider only offers +a table or lossy native status shape, keep that parsing inside the backend. + +`Stop` should stop the provider resource, remove local claims, and remove local +per-resource keys if the backend created them. + +Do not make delegated providers support `crabbox ssh`, `vnc`, `webvnc`, +`screenshot`, `code`, or Actions runner hydration unless the provider exposes a +stable connection contract that preserves Crabbox's security boundary. + +## Rendering + +Backends return values. Core renders output. + +`ListRequest` and `StatusRequest` intentionally do not carry JSON flags. The +command handler decides whether to render human output or JSON. + +`JSONListBackend` is the exception for compatibility with older script-facing +JSON schemas. It should not be used for new providers. + +That rule keeps: + +- `crabbox list --json`; +- `crabbox status --json`; +- human tables; +- future UI/plugin consumers; + +consistent across backend kinds. + +## External Provider Plugins + +External process plugins are not implemented yet. Do not add a provider that +depends on an undocumented stdio protocol. + +The intended direction is: + +- a built-in Go provider package discovers/configures the external process; +- the process speaks JSON over stdio; +- the Go side adapts it to `SSHLeaseBackend` or `DelegatedRunBackend`; +- core commands still render list/status and own SSH workflows where applicable. + +Expected rough command shape: + +```text +provider-plugin capabilities +provider-plugin acquire +provider-plugin resolve +provider-plugin list +provider-plugin release +provider-plugin touch +provider-plugin run +provider-plugin status +provider-plugin stop +``` + +The external protocol should not bypass the backend interfaces. It is an +implementation detail behind a normal registered provider. + +## Tests + +Add tests at the lowest level that proves the contract. + +For provider registration: + +- canonical name resolves through `ProviderFor`; +- aliases resolve where promised; +- `Spec` has the expected kind, targets, features, and coordinator mode; +- provider-specific flags apply only after selection. + +For SSH lease backends: + +- acquire success returns a `LeaseTarget` with host, user, port, key, lease id; +- acquire failure releases partial resources when possible; +- resolve supports lease id and supported aliases; +- list returns normalized views without printing; +- touch updates labels/tags and honors state/idle timeout; +- release removes claims and provider resources; +- cleanup honors dry-run. + +For delegated run backends: + +- sync-only/checksum/force-large options are rejected; +- new run acquires, claims, streams, and stops when `Keep=false`; +- existing id/slug resolves and claims correctly; +- list/status parse provider output into normalized views; +- stop removes claims and local keys; +- all subprocess calls go through `Runtime.Exec`. + +Use fake `CommandRunner`, fake clocks, fake HTTP clients, and provider test +clients. Avoid live provider calls in unit tests. + +Run at least: + +```sh +go test -count=1 ./internal/cli ./internal/providers/... +go test -count=1 ./... +go vet ./... +npm run docs:check +``` + +For high-risk provider changes, also run: + +```sh +go test -race -count=1 ./internal/cli +go build -trimpath -o bin/crabbox ./cmd/crabbox +``` + +Add live smoke only when credentials and cost boundaries are explicit. + +## Review Checklist + +Before landing a new backend: + +- The provider has a folder under `internal/providers/`. +- The provider is imported by `internal/providers/all`. +- `Name` is canonical and docs use that name. +- Compatibility aliases are intentional and tested. +- `ProviderSpec.Kind` matches the real execution model. +- Targets and features describe implemented behavior only. +- Coordinator mode is `CoordinatorNever` unless the coordinator can provision it. +- Provider flags are registered before parse and applied only after selection. +- Secrets are not stored in repo config or passed in argv. +- `list` and `status` return normalized values instead of printing. +- Delegated providers reject unsupported sync options. +- SSH providers do not own core sync/run/rendering. +- Tests cover command dispatch and backend behavior without live credentials. +- Docs and source map are updated. diff --git a/docs/refactor/provider.md b/docs/refactor/provider.md new file mode 100644 index 0000000..11fb183 --- /dev/null +++ b/docs/refactor/provider.md @@ -0,0 +1,1422 @@ +# Provider Backend Refactor + +Read when: + +- refactoring provider dispatch, direct lifecycle, or delegated run behavior; +- rebasing the Daytona or Islo provider pull requests; +- adding a new provider backend; +- changing provider config, provider flags, coordinator routing, list/status/stop, + cleanup, or capability validation. + +For step-by-step implementation guidance, read +[Provider Backends](../provider-backends.md). This document captures design +context and migration notes; the authoring guide is the handrail for new code. + +## Context + +Crabbox has two real execution models. + +The first model is SSH lease execution. Hetzner, AWS, static SSH, and Daytona +produce a machine or sandbox reachable through SSH. Crabbox owns the workflow: +claim, sync, command wrapping, stdout/stderr streaming, result collection, +timing, heartbeat, and release. + +The second model is delegated execution. Blacksmith Testboxes and Islo own +machine setup, file/workspace state, command execution, and output streaming. +Crabbox keeps provider selection, config, local claims/slugs, and timing +summaries, but it does not rsync into these providers. + +Relevant pull requests: + +- Daytona provider: https://github.com/openclaw/crabbox/pull/32 +- Islo SDK provider: https://github.com/openclaw/crabbox/pull/24 +- older Islo CLI provider: https://github.com/openclaw/crabbox/pull/16 + +SDK/source checks: + +- Daytona upstream has an official Go SDK at + `github.com/daytonaio/daytona/libs/sdk-go`, plus a lower-level generated API + client. The official SDK is large and brings more dependency surface than the + provider needs. The generated API client exposes the exact REST calls Crabbox + needs: create sandbox, create SSH access, list sandboxes, update labels, and + update last activity. Prefer a tiny Crabbox-owned REST client unless the SDK + becomes meaningfully simpler. +- Daytona snapshot creation does not accept CPU/memory/disk resources. Resource + fields live on image creation. Snapshot-only mode must not expose resource + flags that become no-ops. +- Daytona JWT auth uses an organization header in the generated client. Require + `DAYTONA_ORGANIZATION_ID` for JWT auth unless upstream docs prove the selected + account flow does not need it. +- Islo's Go SDK is young, low-adoption, generated, and has no tagged versions in + the checked source. It is acceptable behind a narrow Crabbox-owned adapter only + if the provider is accepted at all. +- Islo's SDK execution stream does not expose a clean typed streaming iterator + today. Keep the custom SSE consumer from the PR until upstream provides a + usable stream API. +- https://github.com/openclaw/crabbox/pull/24 superseded + https://github.com/openclaw/crabbox/pull/16 but was closed for product-fit and + scope concerns. Rebase it only as a delegated backend, not as an SSH-like + provider. + +The current implementation has provider checks spread through command handlers +and helper paths. More `isDaytonaProvider` and `isIsloProvider` branches would +work short term, but every new provider would touch `run`, `warmup`, `list`, +`status`, `stop`, `cleanup`, config, capability validation, and docs. + +The refactor should make providers supply small backends while Crabbox core owns +the workflows. + +## Design Principle + +Providers do not own commands. Providers configure backends. Core commands own +workflow orchestration. + +The command flow should look like this: + +```go +backend, err := loadBackend(cfg, runtime) +if err != nil { + return err +} + +switch b := backend.(type) { +case DelegatedRunBackend: + return b.Run(ctx, runReq) + +case SSHLeaseBackend: + lease, acquired, err := acquireOrResolve(ctx, b, runReq) + if err != nil { + return err + } + return runOverSSHLease(ctx, b, lease, runReq, acquired) + +default: + return exit(2, "provider=%s does not support run", backend.Spec().Name) +} +``` + +Provider implementations should not receive `App`. They receive a narrow +runtime and typed request structs. + +## Goals + +- Keep all current providers working. +- Rebase Daytona as an SSH lease backend. +- Rebase Islo as a delegated run backend. +- Keep Hetzner/AWS broker behavior intact when a coordinator is configured. +- Make coordinator routing a wrapper around SSH lease backends, not provider + branching inside each command. +- Register built-in provider flags before parsing so provider-specific flags do + not fail before provider selection. +- Keep built-in providers compiled into the Go binary. +- Avoid Go dynamic plugins. +- Leave an external process plugin protocol as a later extension point. +- Keep provider credentials out of repo config and command arguments. + +## Current Implementation State + +The first landing implements the provider seam for the existing services: + +- `warmup`, `run`, `list`, `status`, `stop`, `cleanup`, lease resolution, and + best-effort touch now load a backend instead of branching on provider names. +- Built-in providers live under `internal/providers/` and are imported by + `cmd/crabbox` through `internal/providers/all`. +- Hetzner, AWS, static SSH, and the coordinator wrapper implement + `SSHLeaseBackend`. +- Blacksmith implements `DelegatedRunBackend` and uses injected + `CommandRunner` instead of package-level `exec.Command`. +- Command rendering for `list` and `status` is core-owned for both backend + kinds. +- `App` no longer owns direct Hetzner/AWS/static acquire or resolve helpers. + +## Non-Goals + +- No runtime-loaded Go `.so` plugins. +- No provider marketplace in this refactor. +- No coordinator support for Daytona or Islo in the first pass. +- No generic remote filesystem abstraction. +- No attempt to make Islo look like SSH unless Islo later ships a stable SSH + contract. +- No VNC, screenshot, desktop, browser, code portal, or Actions runner support + for Daytona/Islo unless a provider backend explicitly implements those + features later. + +## Provider And Backend Interfaces + +`Provider` is the registration and configuration layer: + +```go +type Provider interface { + Name() string + Aliases() []string + Spec() ProviderSpec + + RegisterFlags(fs *flag.FlagSet, defaults Config) any + ApplyFlags(cfg *Config, fs *flag.FlagSet, values any) error + + Configure(cfg Config, rt Runtime) (Backend, error) +} +``` + +`Backend` is the configured runtime object: + +```go +type Backend interface { + Spec() ProviderSpec +} +``` + +Only two backend shapes are needed initially. + +### SSH Lease Backend + +```go +type SSHLeaseBackend interface { + Backend + + Acquire(ctx context.Context, req AcquireRequest) (LeaseTarget, error) + Resolve(ctx context.Context, req ResolveRequest) (LeaseTarget, error) + List(ctx context.Context, req ListRequest) ([]LeaseView, error) + ReleaseLease(ctx context.Context, req ReleaseLeaseRequest) error + Touch(ctx context.Context, req TouchRequest) (Server, error) +} +``` + +This is for providers that can hand Crabbox an SSH target. Core owns sync and +command execution after acquisition. + +### Delegated Run Backend + +```go +type DelegatedRunBackend interface { + Backend + + Warmup(ctx context.Context, req WarmupRequest) error + Run(ctx context.Context, req RunRequest) (RunResult, error) + List(ctx context.Context, req ListRequest) ([]LeaseView, error) + Status(ctx context.Context, req StatusRequest) (statusView, error) + Stop(ctx context.Context, req StopRequest) error +} +``` + +This is for providers that own execution. Core does not call SSH, rsync, or +remote command wrapping for these providers. Delegated providers may stream +stdout/stderr during `Run`, but they should not own normal `list` or `status` +rendering when a normalized value can describe the result. If a provider has a +lossy or native-only status shape, keep that loss inside its backend and return +the closest status view instead of printing directly from command code. The +current implementation still uses unexported `statusView`; exporting +`StatusView` is a follow-up before delegated backend implementations can move +fully out of `internal/cli`. + +### Optional Backend Interfaces + +Cleanup should be optional: + +```go +type CleanupBackend interface { + Backend + + Cleanup(ctx context.Context, req CleanupRequest) error +} +``` + +Provider pricing can be added later as another optional interface: + +```go +type PricingBackend interface { + Backend + + Price(ctx context.Context, req PriceRequest) (HourlyPrice, error) +} +``` + +## Runtime + +Backends should receive a narrow runtime instead of `App`: + +```go +type Runtime struct { + Stdout io.Writer + Stderr io.Writer + Clock Clock + HTTP *http.Client + Exec CommandRunner +} + +type CommandRunner interface { + Run(ctx context.Context, req LocalCommandRequest) (LocalCommandResult, error) +} + +type LocalCommandRequest struct { + Name string + Args []string + Env []string + Dir string + Stdout io.Writer + Stderr io.Writer +} + +type LocalCommandResult struct { + ExitCode int + Stdout string + Stderr string +} +``` + +Provider modules should not reach into command state, global command handlers, +or `App` methods. If they need a helper, move that helper into a small shared +package or pass it through a request/runtime field. + +Tests can then inject writers, clocks, fake HTTP clients, and fake backends +without constructing a full CLI app. `CommandRunner` is the seam for delegated +CLI providers such as Blacksmith so tests do not depend on package-level +`exec.Command` hooks. + +## Provider Spec + +Provider capabilities should be declarative and typed, not a growing list of +provider-name checks. + +```go +type ProviderSpec struct { + Name string + Kind ProviderKind + Targets []TargetSpec + Features FeatureSet + Coordinator CoordinatorMode +} + +type ProviderKind string + +const ( + ProviderKindSSHLease ProviderKind = "ssh-lease" + ProviderKindDelegatedRun ProviderKind = "delegated-run" +) + +type CoordinatorMode string + +const ( + CoordinatorNever CoordinatorMode = "never" + CoordinatorSupported CoordinatorMode = "supported" +) + +type TargetSpec struct { + OS string + WindowsMode string +} + +type Feature string + +const ( + FeatureSSH Feature = "ssh" + FeatureCrabboxSync Feature = "crabbox-sync" + FeatureCleanup Feature = "cleanup" + FeatureDesktop Feature = "desktop" + FeatureBrowser Feature = "browser" + FeatureCode Feature = "code" + FeatureTailscale Feature = "tailscale" +) +``` + +Do not model Actions runner hydration as an AWS provider feature. That workflow +is core-over-SSH after a Linux lease exists. Validate `--actions-runner` as +"requires `SSHLeaseBackend`, target Linux, and not delegated" unless the provider +later owns a distinct hosted-runner product. + +Initial provider matrix: + +```text +provider kind coordinator features +hetzner ssh-lease supported ssh, crabbox-sync, cleanup, tailscale +aws ssh-lease supported ssh, crabbox-sync, cleanup, desktop, browser, code +ssh ssh-lease never ssh, crabbox-sync, desktop, browser, code +daytona ssh-lease never ssh, crabbox-sync, cleanup +blacksmith-testbox delegated-run never delegated execution +islo delegated-run never delegated execution +``` + +Initial target matrix: + +```text +hetzner linux +aws linux, windows/normal, windows/wsl2, macos +ssh linux, windows/normal, windows/wsl2, macos +daytona linux +blacksmith-testbox provider-owned linux +islo provider-owned linux +``` + +Capability errors should come from `ProviderSpec` plus provider-specific +validation: + +```text +provider=daytona managed provisioning supports target=linux only +desktop/VNC is not supported for provider=islo; islo sandboxes are headless +--actions-runner is not supported for provider=daytona +``` + +## Registry + +Built-in providers register at init time: + +```go +var providerRegistry = map[string]Provider{} + +func RegisterProvider(provider Provider) { + names := append([]string{provider.Name()}, provider.Aliases()...) + for _, name := range names { + key := normalizeProviderName(name) + if key == "" { + panic("provider name is empty") + } + if providerRegistry[key] != nil { + panic("provider already registered: " + key) + } + providerRegistry[key] = provider + } +} + +func ProviderFor(name string) (Provider, error) { + provider := providerRegistry[normalizeProviderName(name)] + if provider == nil { + return nil, exit(2, "unknown provider %q", name) + } + return provider, nil +} +``` + +Canonical provider names: + +```text +hetzner +aws +ssh +blacksmith-testbox +daytona +islo +``` + +Compatibility aliases: + +```text +static -> ssh +static-ssh -> ssh +blacksmith -> blacksmith-testbox +``` + +Docs should use canonical names. + +## On-Disk Layout + +Use one folder per provider for registration, provider-specific flags, provider +specs, and backend configuration: + +```text +internal/providers/all # imports every built-in provider +internal/providers/hetzner # Hetzner provider registration/spec +internal/providers/aws # AWS provider registration/spec +internal/providers/ssh # static SSH provider registration/spec +internal/providers/blacksmith # Blacksmith provider registration/spec +internal/cli/provider_backend.go # core interfaces, registry, requests +internal/cli/providers_common.go # shared direct SSH backend helpers +internal/cli/provider_aws.go # AWS SSH lease backend implementation +internal/cli/provider_hetzner.go # Hetzner SSH lease backend implementation +internal/cli/provider_static.go # static SSH lease backend implementation +internal/cli/provider_coordinator.go # brokered coordinator lease backend +internal/cli/provider_blacksmith.go # delegated Blacksmith backend implementation +internal/cli/hcloud.go # Hetzner API client +internal/cli/aws.go # AWS API client +internal/cli/static.go # static SSH target mapping and flags +internal/cli/blacksmith.go # Blacksmith args/parsing helpers +``` + +The first split keeps backend implementations in `internal/cli` because the +existing providers still use broad unexported lifecycle helpers for SSH keys, +claims, labels, slugs, coordinator heartbeats, sync, release, and timing. The +exported contract between provider folders and CLI is deliberately small: +`Provider`, `ProviderSpec`, request/result types, `Runtime`, and one backend +constructor per built-in provider. + +Move each backend implementation deeper into `internal/providers/` only +as the required helper surface becomes intentionally exported. New providers +such as Daytona and Islo should start in their own provider folder and avoid +depending on CLI internals that are not part of that exported contract. + +## Flag Parsing + +Go's `flag` package rejects unknown flags during parse. This means +provider-specific flags must be registered before `flag.Parse`, even though the +selected provider is only known after config and flags are merged. + +Use this first-pass strategy for built-in providers: + +1. register common command flags; +2. iterate over all registered built-in providers and call `RegisterFlags`; +3. parse once; +4. load config; +5. apply common flags; +6. select `ProviderFor(cfg.Provider)`; +7. apply only the selected provider's parsed flag values; +8. configure the backend. + +Example: + +```go +providerFlagValues := RegisterAllProviderFlags(fs, defaults) +if err := parseFlags(fs, args); err != nil { + return err +} + +cfg, err := loadConfig() +if err != nil { + return err +} +applyCommonFlags(&cfg, fs, commonValues) + +provider, err := ProviderFor(cfg.Provider) +if err != nil { + return err +} +if err := ApplySelectedProviderFlags(provider, &cfg, fs, providerFlagValues); err != nil { + return err +} + +backend, err := provider.Configure(cfg, runtime) +``` + +Flags for non-selected providers are parsed but ignored. + +Provider `ApplyFlags` methods must only mutate config for flags that were +actually present in argv, using `flagWasSet` or equivalent. The values passed to +`RegisterFlags` exist so the parser and help text know the flag shape; they must +not overwrite repo config just because every built-in provider flag was +registered up front. + +A two-pass parser should only be introduced if external process providers need +to define flags dynamically. In that future design, pass one parses only safe +global selectors such as `--provider` and `--config`, loads provider metadata, +registers provider flags, and pass two parses the original args. + +Provider-specific flags: + +```text +--blacksmith-org +--blacksmith-workflow +--blacksmith-job +--blacksmith-ref + +--daytona-snapshot +--daytona-target +--daytona-user +--daytona-work-root +--daytona-ssh-token-minutes + +--islo-image +--islo-workdir +--islo-gateway-profile +``` + +Avoid exposing provider flags that cannot work. For Daytona, do not expose +CPU/memory/disk overrides while the integration is snapshot-only and Daytona +rejects resource fields with snapshots. Either implement image mode fully or +hide resource overrides. + +## Backend Loading + +All commands should use the same loading shape: + +```go +func loadBackend(cfg Config, rt Runtime) (Backend, error) { + provider, err := ProviderFor(cfg.Provider) + if err != nil { + return nil, err + } + backend, err := provider.Configure(cfg, rt) + if err != nil { + return nil, err + } + if ssh, ok := backend.(SSHLeaseBackend); ok && shouldUseCoordinator(cfg, provider.Spec()) { + coord, err := newCoordinatorClientForBackend(cfg) + if err != nil { + return nil, err + } + return NewCoordinatorLeaseBackend(coord, ssh, rt), nil + } + return backend, nil +} +``` + +`Configure` builds direct provider clients and validates provider auth early. +Provider flag registration and `ApplyFlags` happen before this function, during +normal config assembly. `loadBackend` should not know about `flag.FlagSet` or +raw argv. +Examples: + +- Hetzner reads `HCLOUD_TOKEN` / `HETZNER_TOKEN`. +- AWS loads AWS SDK config. +- Daytona reads `DAYTONA_API_KEY` or `DAYTONA_JWT_TOKEN`. +- Islo validates `ISLO_API_KEY` before SDK use. +- Blacksmith verifies enough local config to build CLI args. + +## Coordinator Wrapper + +Coordinator routing should be a wrapper around `SSHLeaseBackend`, not a special +provider path inside every command. + +```go +func shouldUseCoordinator(cfg Config, spec ProviderSpec) bool { + if spec.Coordinator != CoordinatorSupported { + return false + } + return cfg.Coordinator != "" +} +``` + +Wrapper shape: + +```go +type CoordinatorLeaseBackend struct { + Coord *CoordinatorClient + Direct SSHLeaseBackend + RT Runtime +} + +func NewCoordinatorLeaseBackend(coord *CoordinatorClient, direct SSHLeaseBackend, rt Runtime) SSHLeaseBackend { + return CoordinatorLeaseBackend{Coord: coord, Direct: direct, RT: rt} +} +``` + +The wrapper implements `SSHLeaseBackend`: + +- `Acquire` calls the coordinator lease API and maps `CoordinatorLease` to + `LeaseTarget`; +- `Resolve` calls coordinator get/slug lookup and maps to `LeaseTarget`; +- `ReleaseLease` calls coordinator release; +- `Touch` calls heartbeat or idle update paths as appropriate; +- `List` can call coordinator pool/admin routes when available. + +In brokered mode, the wrapper owns key creation, coordinator lease creation, +lease lookup, heartbeat, run recorder attachment, and lease release. It must not +fall through to direct Hetzner/AWS acquire, resolve, touch, release, list, or +cleanup calls after the coordinator is selected. The wrapped direct backend +exists only to carry the provider spec and direct-mode implementation for the +non-brokered path. + +Brokered list/pool commands still need the existing admin-token enforcement. +Either the command validates that before calling `List`, or +`CoordinatorLeaseBackend.List` returns the same missing-admin-token error. The +wrapper must not silently downgrade brokered pool/list to direct provider list. + +Initial coordinator modes: + +```text +hetzner supported +aws supported +ssh never +daytona never +blacksmith-testbox never +islo never +``` + +Daytona and Islo can gain broker support later by changing their spec and +implementing Worker-side provider support. That is out of scope for rebasing +the current PRs. + +## Request And Result Types + +`Provider.Configure` is the only place that should receive full `Config`. +Provider modules should decode their typed config, create provider clients, and +store those on the configured backend. Requests then carry command intent, repo +state, and options. They should not carry `App`, and they should not carry full +`Config` unless a migration step still needs compatibility with old helpers. + +```go +type LeaseOptions struct { + TargetOS string + WindowsMode string + Class string + ServerType string + IdleTimeout time.Duration + TTL time.Duration + Desktop bool + Browser bool + Code bool + ActionsRunner bool + Tailscale TailscaleConfig + WorkRoot string + SSHUser string + SSHPort string + SSHKey string + Sync SyncConfig + Results ResultsConfig + EnvAllow []string +} + +type AcquireRequest struct { + Repo Repo + Options LeaseOptions + Keep bool + Reclaim bool +} + +type ResolveRequest struct { + Repo Repo + Options LeaseOptions + ID string + Reclaim bool +} + +type ReleaseLeaseRequest struct { + Lease LeaseTarget + Force bool +} + +type TouchRequest struct { + Lease LeaseTarget + State string + IdleTimeout time.Duration +} + +type ListRequest struct { + Options LeaseOptions +} + +type RunRequest struct { + Repo Repo + ID string + Options LeaseOptions + Keep bool + Reclaim bool + NoSync bool + SyncOnly bool + DebugSync bool + ShellMode bool + ChecksumSync bool + ForceSyncLarge bool + Command []string + TimingJSON bool +} + +type WarmupRequest struct { + Repo Repo + Options LeaseOptions + Keep bool + Reclaim bool + ActionsRunner bool + TimingJSON bool +} + +type StatusRequest struct { + Options LeaseOptions + ID string + Wait bool + WaitTimeout time.Duration +} + +type StopRequest struct { + Options LeaseOptions + ID string +} + +type RunResult struct { + ExitCode int + Command time.Duration + Total time.Duration + SyncDelegated bool +} +``` + +Core command code is responsible for converting CLI/config state into +`LeaseOptions` once. Backends should not re-read global command state or decode +raw provider config maps after `Configure`. + +`LeaseOptions` is intentionally broad for the migration. Direct provisioning +backends should usually care only about the provisioning subset, while the shared +SSH workflow consumes sync, result, and environment options. After the provider +split lands, consider splitting this into `ProvisionOptions` and `RunOptions`. + +`LeaseView` and `StatusView` are command-facing view models. They can wrap or +alias the existing `Server` and `statusView` during migration, but they must +carry redaction metadata for secret-bearing auth. Rendering is core-owned for +both backend kinds: `ListRequest` and `StatusRequest` do not carry JSON or human +format flags because backends return normalized views and core renders them. +`JSONListBackend` is a narrow compatibility escape hatch for existing +script-facing JSON schemas such as coordinator pool machines and Blacksmith +table rows; new providers should not need it. + +Delegated providers should reject irrelevant sync options through a shared +helper: + +```go +func rejectDelegatedSyncOptions(provider string, req RunRequest) error { + if req.SyncOnly { + return exit(2, "provider=%s does not sync local files; --sync-only is not supported", provider) + } + if req.ChecksumSync { + return exit(2, "provider=%s does not sync local files; --checksum is not supported", provider) + } + if req.ForceSyncLarge { + return exit(2, "provider=%s does not sync local files; --force-sync-large is not supported", provider) + } + return nil +} +``` + +## Shared SSH Workflow + +`runCommand` should lose the provider lifecycle details and call one shared SSH +workflow: + +```go +func runOverSSHLease( + ctx context.Context, + backend SSHLeaseBackend, + lease LeaseTarget, + req RunRequest, + acquired bool, + rt Runtime, +) error +``` + +This workflow owns: + +- local claim/reclaim checks; +- coordinator recorder attachment when the backend is coordinator-wrapped; +- heartbeat/touch lifecycle through `backend.Touch`; +- Actions hydration marker detection; +- sync manifest creation, preflight, git seed, rsync/archive transfer, remote + prune, and sync finalize; +- POSIX/native Windows/WSL2 command wrapping; +- stdout/stderr streaming and run log buffering; +- JUnit result collection; +- timing summary and timing JSON; +- release through `backend.ReleaseLease` when `acquired && !req.Keep`. + +Providers must not copy this workflow. Daytona, Hetzner, AWS, and static SSH all +reuse it. + +`ReleaseLease` means "tear down the lease/resource for this specific command or +explicit stop." Background TTL/orphan cleanup is separate and belongs to +`CleanupBackend`. Static SSH can implement `ReleaseLease` as a no-op, but it +must not opt into cleanup. + +## Lease Target And SSH Target + +Lease backends return: + +```go +type LeaseTarget struct { + Server Server + Target SSHTarget + LeaseID string + Options LeaseOptions +} +``` + +`Server` stays as the neutral provider resource for this refactor: + +```go +type Server struct { + CloudID string + Provider string + ID int64 + Name string + Status string + Labels map[string]string + PublicNet struct { IPv4 struct { IP string } } + ServerType struct { Name string } +} +``` + +`SSHTarget` needs explicit metadata for secret-bearing auth: + +```go +type SSHTarget struct { + User string + Host string + Key string + Port string + FallbackPorts []string + TargetOS string + WindowsMode string + ReadyCheck string + AuthSecret bool + NetworkKind NetworkMode +} +``` + +SSH rendering must omit `-i` when `Key == ""`. Human-readable status, list, +timing output, and normal JSON output must redact `User` when `AuthSecret` is +true. The only intended token-revealing surface is an explicit connect action +such as `crabbox ssh --provider daytona --id ...`. + +Daytona target example: + +```go +SSHTarget{ + User: token, + Host: "ssh.app.daytona.io", + Port: "22", + Key: "", + TargetOS: "linux", + ReadyCheck: "command -v git >/dev/null && command -v rsync >/dev/null && command -v tar >/dev/null", + AuthSecret: true, + NetworkKind: NetworkPublic, +} +``` + +Normal output: + +```text +ready ssh=@ssh.app.daytona.io:22 network=public workroot=/home/daytona/crabbox +``` + +The actual interactive `crabbox ssh --provider daytona --id ...` command may +print a token-bearing connect command only because the user explicitly asked +for SSH access. + +## Provider State Contract + +Direct SSH lease providers should map provider resources into `Server` and use +Crabbox labels/tags when the provider supports metadata. + +Required labels: + +```text +crabbox=true +provider= +lease= +slug= +state=provisioning|leased|ready|running|released|failed +keep=true|false +target=linux|windows|macos +windows_mode=normal|wsl2 +server_type= +created_at= +last_touched_at= +idle_timeout_secs= +ttl_secs= +expires_at= +``` + +Current direct providers write Unix seconds. The parser also accepts RFC3339 +and RFC3339Nano for compatibility with old or external records. Moving labels +to RFC3339 would be a behavior change and must update Hetzner/AWS tests and +docs together. + +Provider-specific labels must be documented: + +```text +provider_key= # Hetzner/AWS direct key cleanup +market=spot|on-demand # AWS +work_root= # Daytona restore/reuse path +``` + +If a provider lacks labels/tags, it must implement equivalent lookup and cleanup +semantics before enabling `FeatureCleanup`. + +## Config Model + +Long term, avoid adding a new top-level `FooConfig` field for every provider. +Use a provider config bag: + +```go +type Config struct { + Provider string + Providers map[string]ProviderConfig + + // Compatibility fields kept while migrating existing config. + Blacksmith BlacksmithConfig + Static StaticConfig +} + +type ProviderConfig map[string]any +``` + +YAML: + +```yaml +provider: daytona + +providers: + daytona: + snapshot: crabbox-ready + target: us + user: daytona + workRoot: /home/daytona/crabbox + sshTokenMinutes: 15 + + islo: + image: docker.io/library/ubuntu:24.04 + workdir: /workspace/crabbox + gatewayProfile: default +``` + +Compatibility: + +- Keep `blacksmith:` while existing configs migrate. +- Keep `static:` because static SSH is already documented and special. +- Daytona and Islo should prefer `providers.daytona` and `providers.islo`. +- Provider modules should expose typed config accessors so command code never + decodes raw maps. + +Example helper shape: + +```go +func DecodeProviderConfig(cfg Config, name string, defaults any, out any) error { + raw := cfg.Providers[name] + if raw == nil { + return copyDefaultProviderConfig(defaults, out) + } + return decodeProviderConfig(raw, defaults, out) +} +``` + +Provider credentials stay in environment or native provider auth stores, not +repo YAML: + +```text +HCLOUD_TOKEN +HETZNER_TOKEN +AWS_PROFILE +AWS_REGION +DAYTONA_API_KEY +DAYTONA_JWT_TOKEN +DAYTONA_ORGANIZATION_ID +DAYTONA_API_URL +ISLO_API_KEY +ISLO_BASE_URL +``` + +## Built-In vs External Plugins + +The first refactor keeps providers compiled into the Go binary. + +Do not use Go `plugin.Open`. Go plugins require matching Go versions, module +versions, architecture, and build flags. They cannot be unloaded, init code runs +on load, and cross-platform support is poor. + +If runtime extension is needed later, use an external process protocol: + +```yaml +provider: my-runner +providers: + my-runner: + kind: command + command: crabbox-provider-my-runner +``` + +The adapter can speak JSON over stdio: + +```json +{"method":"spec","params":{}} +{"method":"warmup","params":{"config":{},"keep":true}} +{"method":"run","params":{"id":"...","command":["go","test","./..."]}} +{"method":"status","params":{"id":"...","wait":false}} +{"method":"stop","params":{"id":"..."}} +``` + +This lets TypeScript or Python SDK adapters exist later without making the core +binary load native plugins. + +## Provider Mapping + +### Hetzner + +Backend: `SSHLeaseBackend` + +Spec: + +```text +kind=ssh-lease +coordinator=supported +targets=linux +features=ssh, crabbox-sync, cleanup, tailscale +``` + +Owns direct mode: + +- `HCLOUD_TOKEN` / `HETZNER_TOKEN` auth; +- SSH key import/delete; +- server create/list/get/delete; +- labels; +- class fallback; +- direct cleanup. + +Reuses core: + +- coordinator wrapper when configured; +- SSH sync/run; +- claims; +- status rendering; +- cleanup policy. + +### AWS + +Backend: `SSHLeaseBackend` + +Spec: + +```text +kind=ssh-lease +coordinator=supported +targets=linux, windows/normal, windows/wsl2, macos +features=ssh, crabbox-sync, cleanup, desktop, browser, code +``` + +Owns direct mode: + +- AWS SDK config and region selection; +- key pair import/delete; +- AMI resolution; +- security group setup; +- EC2 launch/list/get/terminate; +- Spot/On-Demand fallback; +- Windows/macOS launch options; +- tags; +- direct cleanup. + +Reuses core: + +- coordinator wrapper when configured; +- SSH sync/run; +- native Windows archive sync and command wrapping; +- claims; +- status rendering; +- cleanup policy. + +### Static SSH + +Backend: `SSHLeaseBackend` + +Spec: + +```text +kind=ssh-lease +coordinator=never +targets=linux, windows/normal, windows/wsl2, macos +features=ssh, crabbox-sync, desktop, browser, code +``` + +Owns: + +- static config to `LeaseTarget` mapping; +- static claim behavior; +- no-op release. + +Does not support provider cleanup or coordinator. + +### Daytona + +Backend: `SSHLeaseBackend` + +Spec: + +```text +kind=ssh-lease +coordinator=never +targets=linux +features=ssh, crabbox-sync, cleanup +``` + +Owns: + +- REST API auth and organization header; +- sandbox create/list/get/start/stop/delete; +- labels and last-activity touch; +- SSH access token minting; +- Daytona sandbox to `Server` mapping; +- secret SSH user and public relay target metadata. + +Reuses core: + +- SSH sync/run; +- claims; +- status rendering; +- cleanup policy. + +Initial constraints: + +- Linux only. +- No coordinator. +- No Tailscale. +- No VNC/screenshot/desktop/browser/code portal. +- No Actions runner. +- Snapshot mode only unless image mode is implemented fully. + +Rebase notes for https://github.com/openclaw/crabbox/pull/32: + +- Implement `Provider.Configure` returning a Daytona `SSHLeaseBackend`. +- Keep raw REST instead of the official Daytona Go SDK. +- Keep the labels body fix: Daytona label update expects + `{ "labels": { ... } }`. +- Keep start-before-SSH for stopped sandboxes. +- Require `DAYTONA_ORGANIZATION_ID` when JWT auth is used unless Daytona docs + prove it is optional for the account shape. +- Do not expose CPU/memory/disk flags while snapshot mode makes them unusable. +- Keep token redaction tests. + +### Blacksmith Testbox + +Backend: `DelegatedRunBackend` + +Spec: + +```text +kind=delegated-run +coordinator=never +targets=provider-owned linux +features=delegated execution +``` + +Owns: + +- Blacksmith CLI command construction; +- warmup/run/list/status/stop; +- Testbox SSH key storage for Blacksmith CLI, through injected filesystem/runtime + helpers; +- provider-specific claim ID resolution; +- delegated timing summaries. + +Does not support Crabbox rsync, `--sync-only`, VNC/screenshot/desktop through +Crabbox, or coordinator. + +### Islo + +Backend: `DelegatedRunBackend` + +Spec: + +```text +kind=delegated-run +coordinator=never +targets=provider-owned linux +features=delegated execution +``` + +Owns: + +- SDK auth and token refresh; +- sandbox create/list/get/delete; +- command execution through provider API; +- SSE parsing for live stdout/stderr; +- Islo lease ID and sandbox name mapping; +- delegated timing summaries. + +Does not support Crabbox rsync, `--sync-only`, `--checksum`, +`--force-sync-large`, VNC/screenshot/desktop/browser through Crabbox, Actions +runner, or coordinator. + +Rebase notes for https://github.com/openclaw/crabbox/pull/24: + +- Implement `Provider.Configure` returning an Islo `DelegatedRunBackend`. +- Keep the small Go SDK dependency if the provider is accepted. +- Keep the custom SSE consumer; the SDK stream method does not expose a clean + streaming API today. +- Validate `ISLO_API_KEY` before SDK calls. +- Keep `ISLO_BASE_URL` as the only base URL override. +- Keep delegated option rejection tests. + +## Migration Plan + +### Phase 1: Registry And Specs + +- Add provider registry. +- Add `Provider`, `Backend`, `ProviderSpec`, and feature/target types. +- Register existing providers as built-ins. +- Keep current command behavior. +- Register all built-in provider flags before `flag.Parse`. + +Expected behavior change: none. + +Status: implemented for existing built-in providers in +`internal/providers/`. + +### Phase 2: Backend Loading + +- Add `Runtime`. +- Add `Provider.Configure`. +- Add `loadBackend`. +- Add fake backend tests for command dispatch. +- Keep old provider helper functions temporarily. + +Expected behavior change: none. + +Status: implemented. `loadBackend(cfg Config, rt Runtime)` intentionally does +not accept `flag.FlagSet` or raw command args. + +### Phase 3: Coordinator Wrapper + +- Add `CoordinatorLeaseBackend`. +- Wrap Hetzner/AWS SSH lease backends when coordinator is configured. +- Prove logged-in/configured users still go through the broker. +- Keep direct Hetzner/AWS when coordinator is disabled. + +Expected behavior change: none. + +Status: implemented for Hetzner/AWS coordinator-backed leases. + +### Phase 4: Extract Shared SSH Workflow + +- Extract `runOverSSHLease`. +- Route Hetzner, AWS, and static SSH through `SSHLeaseBackend`. +- Preserve heartbeat, recorder, release, sync, Windows archive sync, and JUnit + behavior. +- Add fake SSH backend tests before rebasing any new provider. These tests should + prove acquire, resolve-by-id, claim/reclaim, sync-only, heartbeat/touch, + timing JSON, release-on-non-keep, and run-recorder behavior without hitting a + real provider. + +Expected behavior change: none. + +Status: implemented for existing SSH providers. New provider PRs should add fake +backend tests before adding live-only coverage. + +### Phase 5: Convert Delegated Providers + +- Move Blacksmith into a `DelegatedRunBackend`. +- Centralize delegated sync-option rejection. +- Dispatch `warmup`, `run`, `list`, `status`, and `stop` through backend shape. + +Expected behavior change: none. + +Status: implemented for Blacksmith Testbox. + +### Phase 6: Provider Config Bag + +- Add `providers:` YAML parsing. +- Add typed provider config decoders. +- Keep existing `blacksmith:` and `static:` compatibility. +- Prefer `providers.` for new providers and docs. + +Expected behavior change: none for existing configs. + +### Phase 7: Rebase Daytona + +- Rebase https://github.com/openclaw/crabbox/pull/32 onto `SSHLeaseBackend`. +- Keep Daytona REST client isolated. +- Add tests for acquire/resolve/list/release/touch via backend. +- Add redaction tests for secret SSH user output. +- Add live smoke behind explicit env gates only. + +Expected behavior change: new provider. + +### Phase 8: Rebase Islo + +- Rebase https://github.com/openclaw/crabbox/pull/24 onto + `DelegatedRunBackend`. +- Keep SDK seam injectable. +- Keep SSE parser tests. +- Add delegated option rejection tests. +- Add live smoke behind explicit env gates only. + +Expected behavior change: new provider if product decision is yes. + +### Phase 9: Remove Compatibility Branches + +- Remove direct command references to `isBlacksmithProvider`, + `isDaytonaProvider`, and `isIsloProvider`. +- Replace remaining static checks with canonical provider/spec checks where + practical. +- Update `docs/source-map.md` and provider feature docs. + +Expected behavior change: none. + +## Tests + +Registry and flag tests: + +- canonical lookup; +- alias lookup; +- duplicate registration panic; +- unknown provider error; +- provider help string includes built-ins; +- built-in provider flags are accepted before provider selection; +- non-selected provider flags parse but are ignored. + +Spec and capability tests: + +- target OS and Windows mode validation per provider; +- unsupported desktop/browser/code provider features and Actions runner + capability errors; +- coordinator wrapper selected for Hetzner/AWS when configured; +- direct backend selected for static, Daytona, and coordinator-disabled + Hetzner/AWS. + +SSH workflow tests: + +- fake SSH backend acquire path enters shared sync/run; +- fake SSH backend resolve path enters shared sync/run; +- touch transitions go through backend; +- release happens on acquired non-keep lease; +- no provider-specific command branch is needed for fake SSH backend. + +Delegated backend tests: + +- fake delegated backend receives warmup/run/list/status/stop requests; +- delegated sync flags are rejected; +- nonzero exit code propagates; +- Blacksmith command execution goes through injected `CommandRunner`, not + package-level `exec.Command`; +- Blacksmith Testbox SSH key storage goes through injected filesystem/runtime + helpers where practical. + +Daytona tests: + +- auth env validation; +- organization header behavior; +- create body shape; +- labels body shape; +- snapshot mode omits unusable resource overrides; +- stopped sandbox starts before SSH target creation; +- SSH target uses relay host, empty key, secret user, public network, and ready + check; +- list/status/timing output, including JSON, redacts token-bearing user; +- release removes local claim. + +Islo tests: + +- SDK factory rejects missing `ISLO_API_KEY`; +- SDK client maps create/get/list/delete; +- SSE parser handles stdout/stderr/exit events; +- run streams output and propagates exit code; +- status wait polls and times out; +- stop removes local claim. + +Docs tests: + +- provider docs link from `docs/features/providers.md`; +- `docs/source-map.md` lists provider implementation files; +- command docs mention provider list consistently. + +## Acceptance Criteria + +- `go test ./...` passes. +- Existing providers keep working: + - `crabbox warmup --provider hetzner` + - `crabbox run --provider aws` + - `crabbox run --provider ssh` + - `crabbox run --provider blacksmith-testbox` +- A fake SSH lease backend can be tested without editing command handlers. +- A fake delegated backend can be tested without editing command handlers. +- Hetzner/AWS still use the coordinator when configured. +- Daytona can be rebased by implementing `SSHLeaseBackend`. +- Islo can be rebased by implementing `DelegatedRunBackend`. +- No new provider requires touching the main command flow unless it adds a new + top-level Crabbox feature. +- Normal list/status/timing output, including JSON, never prints secret SSH users + or provider API credentials. + +## Open Questions + +- Should `Server` become `Machine` after providers no longer all create + servers? +- Should `providers.` become the only provider config namespace in a + future major release? +- Should external command providers use a small Crabbox JSON protocol or MCP? + The smaller JSON protocol is preferred for now. +- Should Daytona support image mode and resource overrides, or stay snapshot + only? +- Should Islo be accepted as a built-in provider at all, given the product-fit + concerns from the closed PRs? diff --git a/docs/source-map.md b/docs/source-map.md index c898149..8dd6a23 100644 --- a/docs/source-map.md +++ b/docs/source-map.md @@ -36,11 +36,22 @@ This page maps user-facing behavior back to implementation files. Keep docs desc - Direct Hetzner provider: `internal/cli/hcloud.go` - Direct AWS provider: `internal/cli/aws.go` - Static SSH macOS/Windows provider: `internal/cli/static.go` -- Blacksmith Testbox CLI wrapper: `internal/cli/blacksmith.go` +- Blacksmith Testbox argument/parsing helpers: `internal/cli/blacksmith.go` +- Provider backend interfaces, registry, and request/result types: + `internal/cli/provider_backend.go` +- Built-in provider registration packages: + `internal/providers/hetzner`, `internal/providers/aws`, + `internal/providers/ssh`, `internal/providers/blacksmith`, + `internal/providers/all` +- Built-in provider backend implementations: + `internal/cli/providers_common.go`, `internal/cli/provider_aws.go`, + `internal/cli/provider_hetzner.go`, `internal/cli/provider_static.go`, + `internal/cli/provider_coordinator.go`, `internal/cli/provider_blacksmith.go` - Worker Hetzner provider: `worker/src/hetzner.ts` - Worker AWS EC2 provider: `worker/src/aws.ts` - Worker AWS AMI create/read/promote routes: `worker/src/fleet.ts`, `worker/src/aws.ts` - Provider feature docs: `docs/features/aws.md`, `docs/features/hetzner.md`, `docs/features/blacksmith-testbox.md` +- Provider/backend authoring guide: `docs/provider-backends.md` - CLI cloud-init bootstrap: `internal/cli/bootstrap.go` - Worker cloud-init bootstrap: `worker/src/bootstrap.ts` - Tailscale feature contract: `docs/features/tailscale.md` diff --git a/internal/cli/actions.go b/internal/cli/actions.go index b75e4b5..fd538c2 100644 --- a/internal/cli/actions.go +++ b/internal/cli/actions.go @@ -78,13 +78,18 @@ func (a App) actionsHydrate(ctx context.Context, args []string) error { if err := claimLeaseForRepoConfig(leaseID, slug, cfg, repo.Root, cfg.IdleTimeout, *reclaim); err != nil { return err } - if coord, ok, err := newTargetCoordinatorClient(cfg); err != nil { + backend, err := loadBackend(cfg, runtimeForApp(a)) + if err != nil { return err - } else if ok { + } + if coord := backendCoordinator(backend); coord != nil { stopHeartbeat := startCoordinatorHeartbeat(ctx, coord, leaseID, cfg.IdleTimeout, nil, a.Stderr) defer stopHeartbeat() - } else { - a.touchActiveLeaseBestEffort(ctx, cfg, server, leaseID) + } else if sshBackend, ok := backend.(SSHLeaseBackend); ok { + _, err := sshBackend.Touch(ctx, TouchRequest{Lease: LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, State: blank(server.Labels["state"], "ready"), IdleTimeout: cfg.IdleTimeout}) + if err != nil { + fmt.Fprintf(a.Stderr, "warning: touch failed for %s: %v\n", leaseID, err) + } } label := githubActionsLeaseLabel(leaseID) if err := a.registerGitHubActionsRunner(ctx, cfg, target, leaseID, slug, ghRepo, "", nil); err != nil { @@ -186,29 +191,15 @@ func (a App) actionsRegister(ctx context.Context, args []string) error { if err != nil { return err } - if coord, ok, err := newTargetCoordinatorClient(cfg); err != nil { - return err - } else if ok { - lease, err := coord.GetLease(ctx, *leaseIDFlag) - if err != nil { - return err - } - _, target, leaseID := leaseToServerTarget(lease, cfg) - if err := claimLeaseForRepoConfig(leaseID, lease.Slug, cfg, repo.Root, cfg.IdleTimeout, *reclaim); err != nil { - return err - } - a.touchCoordinatorLeaseBestEffort(ctx, cfg, leaseID) - return a.registerGitHubActionsRunner(ctx, cfg, target, leaseID, lease.Slug, ghRepo, *nameFlag, extraLabels) - } - server, target, leaseID, err := a.findLease(ctx, cfg, *leaseIDFlag) + server, target, leaseID, slug, err := a.resolveLeaseTargetForActions(ctx, cfg, *leaseIDFlag) if err != nil { return err } - if err := claimLeaseForRepoConfig(leaseID, serverSlug(server), cfg, repo.Root, cfg.IdleTimeout, *reclaim); err != nil { + if err := claimLeaseForRepoConfig(leaseID, slug, cfg, repo.Root, cfg.IdleTimeout, *reclaim); err != nil { return err } - a.touchActiveLeaseBestEffort(ctx, cfg, server, leaseID) - return a.registerGitHubActionsRunner(ctx, cfg, target, leaseID, serverSlug(server), ghRepo, *nameFlag, extraLabels) + a.touchLeaseTargetBestEffort(ctx, cfg, LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, "") + return a.registerGitHubActionsRunner(ctx, cfg, target, leaseID, slug, ghRepo, *nameFlag, extraLabels) } func (a App) actionsDispatch(ctx context.Context, args []string) error { @@ -277,17 +268,7 @@ func (a App) registerGitHubActionsRunner(ctx context.Context, cfg Config, target } func (a App) resolveLeaseTargetForActions(ctx context.Context, cfg Config, id string) (Server, SSHTarget, string, string, error) { - if coord, ok, err := newTargetCoordinatorClient(cfg); err != nil { - return Server{}, SSHTarget{}, "", "", err - } else if ok { - lease, err := coord.GetLease(ctx, id) - if err != nil { - return Server{}, SSHTarget{}, "", "", err - } - server, target, leaseID := leaseToServerTarget(lease, cfg) - return server, target, leaseID, lease.Slug, nil - } - server, target, leaseID, err := a.findLease(ctx, cfg, id) + server, target, leaseID, err := a.resolveLeaseTarget(ctx, cfg, id) return server, target, leaseID, serverSlug(server), err } diff --git a/internal/cli/blacksmith.go b/internal/cli/blacksmith.go index db4dd77..dc339d7 100644 --- a/internal/cli/blacksmith.go +++ b/internal/cli/blacksmith.go @@ -1,13 +1,8 @@ package cli import ( - "bytes" - "context" - "encoding/json" "flag" "fmt" - "io" - "os/exec" "regexp" "strings" "time" @@ -16,25 +11,12 @@ import ( const blacksmithTestboxProvider = "blacksmith-testbox" var ( - blacksmithCommandContext = exec.CommandContext blacksmithIDPattern = regexp.MustCompile(`\btbx_[A-Za-z0-9_-]+\b`) blacksmithCleanupAttempts = 36 blacksmithCleanupDelay = 5 * time.Second blacksmithCleanupQuiet = 12 ) -type blacksmithRunOptions struct { - ID string - Keep bool - Reclaim bool - SyncOnly bool - Debug bool - ShellMode bool - Command []string - IdleTimeout time.Duration - TimingJSON bool -} - type blacksmithFlagValues struct { Org *string Workflow *string @@ -80,204 +62,6 @@ func applyBlacksmithFlagOverrides(cfg *Config, fs *flag.FlagSet, values blacksmi } } -func (a App) blacksmithWarmup(ctx context.Context, cfg Config, repo Repo, keep, reclaim, timingJSON bool) error { - started := time.Now() - leaseID, slug, err := a.blacksmithWarmupLease(ctx, cfg, repo, reclaim) - if err != nil { - return err - } - fmt.Fprintf(a.Stdout, "leased %s slug=%s provider=%s idle_timeout=%s\n", leaseID, slug, blacksmithTestboxProvider, blacksmithIdleTimeout(cfg)) - if !keep { - fmt.Fprintf(a.Stderr, "warning: blacksmith warmup keeps the testbox until idle timeout or explicit stop\n") - } - fmt.Fprintf(a.Stdout, "warmup complete total=%s\n", time.Since(started).Round(time.Millisecond)) - if timingJSON { - total := time.Since(started) - if err := writeTimingJSON(a.Stderr, timingReport{ - Provider: blacksmithTestboxProvider, - LeaseID: leaseID, - Slug: slug, - TotalMs: total.Milliseconds(), - ExitCode: 0, - }); err != nil { - return err - } - } - return nil -} - -func (a App) blacksmithRun(ctx context.Context, cfg Config, repo Repo, opts blacksmithRunOptions) error { - if opts.SyncOnly { - return exit(2, "blacksmith-testbox delegates sync to Blacksmith; --sync-only is not supported") - } - started := time.Now() - leaseID := opts.ID - acquired := false - var err error - if leaseID == "" { - leaseID, _, err = a.blacksmithWarmupLease(ctx, cfg, repo, opts.Reclaim) - if err != nil { - return err - } - acquired = true - } else { - leaseID, err = resolveBlacksmithLeaseID(leaseID, repo.Root, opts.Reclaim) - if err != nil { - return err - } - slug, err := blacksmithClaimSlug(opts.ID, leaseID) - if err != nil { - return err - } - if err := claimLeaseForRepoProvider(leaseID, slug, blacksmithTestboxProvider, repo.Root, opts.IdleTimeout, opts.Reclaim); err != nil { - return err - } - } - if acquired && !opts.Keep { - defer func() { - if err := a.blacksmithStopLease(context.Background(), cfg, leaseID); err != nil { - fmt.Fprintf(a.Stderr, "warning: blacksmith stop failed for %s: %v\n", leaseID, err) - return - } - removeLeaseClaim(leaseID) - removeStoredTestboxKey(leaseID) - }() - } - fmt.Fprintf(a.Stderr, "provider=blacksmith-testbox id=%s sync=delegated auth=blacksmith\n", leaseID) - commandStart := time.Now() - code := a.runBlacksmithTestbox(ctx, cfg, leaseID, opts.Command, opts.Debug, opts.ShellMode) - commandDuration := time.Since(commandStart) - total := time.Since(started) - fmt.Fprintf(a.Stderr, "blacksmith run summary sync=delegated command=%s total=%s exit=%d\n", commandDuration.Round(time.Millisecond), total.Round(time.Millisecond), code) - if opts.TimingJSON { - if err := writeTimingJSON(a.Stderr, timingReport{ - Provider: blacksmithTestboxProvider, - LeaseID: leaseID, - SyncPhases: []timingPhase{{Name: "delegated", Skipped: true, Reason: "blacksmith-testbox owns sync"}}, - SyncDelegated: true, - CommandMs: commandDuration.Milliseconds(), - TotalMs: total.Milliseconds(), - ExitCode: code, - }); err != nil { - return err - } - } - if code != 0 { - return ExitError{Code: code, Message: fmt.Sprintf("blacksmith testbox run exited %d", code)} - } - return nil -} - -func (a App) blacksmithList(ctx context.Context, cfg Config, jsonOut bool) error { - args := blacksmithListArgs(cfg) - if !jsonOut { - return a.streamBlacksmith(ctx, args) - } - cmd := blacksmithCommandContext(ctx, "blacksmith", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return ExitError{Code: exitCode(err), Message: fmt.Sprintf("blacksmith failed: %v: %s", err, strings.TrimSpace(string(out)))} - } - return json.NewEncoder(a.Stdout).Encode(parseBlacksmithList(string(out))) -} - -func (a App) blacksmithStatus(ctx context.Context, cfg Config, id string, wait bool, waitTimeout time.Duration, jsonOut bool) error { - if jsonOut { - return exit(2, "blacksmith-testbox status does not support --json") - } - leaseID, err := resolveBlacksmithLeaseID(id, "", false) - if err != nil { - return err - } - return a.streamBlacksmith(ctx, blacksmithStatusArgs(cfg, leaseID, wait, waitTimeout)) -} - -func (a App) blacksmithStop(ctx context.Context, cfg Config, id string) error { - leaseID, err := resolveBlacksmithLeaseID(id, "", false) - if err != nil { - return err - } - if err := a.blacksmithStopLease(ctx, cfg, leaseID); err != nil { - return err - } - removeLeaseClaim(leaseID) - removeStoredTestboxKey(leaseID) - return nil -} - -func (a App) blacksmithWarmupLease(ctx context.Context, cfg Config, repo Repo, reclaim bool) (string, string, error) { - pendingID := "tbx_pending_" + strings.TrimPrefix(newLeaseID(), "cbx_") - cleanupKeyID := pendingID - defer func() { - if cleanupKeyID != "" { - removeStoredTestboxKey(cleanupKeyID) - } - }() - _, publicKey, err := ensureTestboxKey(pendingID) - if err != nil { - return "", "", err - } - args, err := blacksmithWarmupArgs(cfg, publicKey) - if err != nil { - return "", "", err - } - beforeWarmup := a.blacksmithListIDsBestEffort(ctx, cfg) - var output bytes.Buffer - cmd := blacksmithCommandContext(ctx, "blacksmith", args...) - cmd.Stdout = io.MultiWriter(a.Stdout, &output) - cmd.Stderr = io.MultiWriter(a.Stderr, &output) - if err := cmd.Run(); err != nil { - a.cleanupFailedBlacksmithWarmup(ctx, cfg, beforeWarmup, output.String()) - return "", "", exit(exitCode(err), "blacksmith testbox warmup failed: %v", err) - } - leaseID := parseBlacksmithID(output.String()) - if leaseID == "" { - return "", "", exit(5, "blacksmith testbox warmup did not print a tbx_ id") - } - if err := moveStoredTestboxKey(pendingID, leaseID); err != nil { - _ = a.blacksmithStopLease(ctx, cfg, leaseID) - return "", "", exit(2, "store blacksmith key for %s: %v", leaseID, err) - } - cleanupKeyID = leaseID - slug := newLeaseSlug(leaseID) - if err := claimLeaseForRepoProvider(leaseID, slug, blacksmithTestboxProvider, repo.Root, blacksmithIdleTimeout(cfg), reclaim); err != nil { - _ = a.blacksmithStopLease(ctx, cfg, leaseID) - return "", "", err - } - cleanupKeyID = "" - return leaseID, slug, nil -} - -func (a App) runBlacksmithTestbox(ctx context.Context, cfg Config, leaseID string, command []string, debug, shellMode bool) int { - keyPath, err := testboxKeyPath(leaseID) - if err != nil { - fmt.Fprintf(a.Stderr, "blacksmith key path failed: %v\n", err) - return 2 - } - args := blacksmithRunArgs(cfg, leaseID, keyPath, command, debug || cfg.Blacksmith.Debug, shellMode) - cmd := blacksmithCommandContext(ctx, "blacksmith", args...) - cmd.Stdout = a.Stdout - cmd.Stderr = a.Stderr - if err := cmd.Run(); err != nil { - return exitCode(err) - } - return 0 -} - -func (a App) blacksmithStopLease(ctx context.Context, cfg Config, leaseID string) error { - return a.streamBlacksmith(ctx, blacksmithStopArgs(cfg, leaseID)) -} - -func (a App) streamBlacksmith(ctx context.Context, args []string) error { - cmd := blacksmithCommandContext(ctx, "blacksmith", args...) - cmd.Stdout = a.Stdout - cmd.Stderr = a.Stderr - if err := cmd.Run(); err != nil { - return ExitError{Code: exitCode(err), Message: fmt.Sprintf("blacksmith failed: %v", err)} - } - return nil -} - func blacksmithWarmupArgs(cfg Config, publicKey string) ([]string, error) { workflow := blacksmithWorkflow(cfg) if workflow == "" { @@ -334,61 +118,6 @@ func blacksmithListAllArgs(cfg Config) []string { return append(blacksmithListArgs(cfg), "--all") } -func (a App) blacksmithListIDsBestEffort(ctx context.Context, cfg Config) map[string]bool { - out, err := blacksmithCommandOutput(ctx, cfg, blacksmithListAllArgs(cfg)) - if err != nil { - return map[string]bool{} - } - ids := map[string]bool{} - for _, item := range parseBlacksmithList(out) { - ids[item.ID] = true - } - return ids -} - -func (a App) cleanupFailedBlacksmithWarmup(ctx context.Context, cfg Config, before map[string]bool, output string) { - if leaseID := parseBlacksmithID(output); leaseID != "" { - if err := a.blacksmithStopLease(ctx, cfg, leaseID); err == nil { - before[leaseID] = true - } - } - stoppedAny := false - quietAttempts := 0 - for attempt := 0; attempt < blacksmithCleanupAttempts; attempt++ { - if attempt > 0 { - select { - case <-ctx.Done(): - return - case <-time.After(blacksmithCleanupDelay): - } - } - list, err := blacksmithCommandOutput(ctx, cfg, blacksmithListAllArgs(cfg)) - if err != nil { - return - } - stopped := false - for _, item := range parseBlacksmithList(list) { - if before[item.ID] || !blacksmithMatchesConfig(item, cfg) { - continue - } - _ = a.blacksmithStopLease(ctx, cfg, item.ID) - before[item.ID] = true - stopped = true - } - if stopped { - stoppedAny = true - quietAttempts = 0 - continue - } - if stoppedAny { - quietAttempts++ - if quietAttempts >= blacksmithCleanupQuiet { - return - } - } - } -} - func blacksmithMatchesConfig(item blacksmithListItem, cfg Config) bool { if workflow := blacksmithWorkflow(cfg); workflow != "" && item.Workflow != workflow { return false @@ -402,15 +131,6 @@ func blacksmithMatchesConfig(item blacksmithListItem, cfg Config) bool { return true } -func blacksmithCommandOutput(ctx context.Context, cfg Config, args []string) (string, error) { - cmd := blacksmithCommandContext(ctx, "blacksmith", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return "", err - } - return string(out), nil -} - func parseBlacksmithList(output string) []blacksmithListItem { items := []blacksmithListItem{} for _, line := range strings.Split(output, "\n") { diff --git a/internal/cli/blacksmith_test.go b/internal/cli/blacksmith_test.go index 0ae7766..15edb7c 100644 --- a/internal/cli/blacksmith_test.go +++ b/internal/cli/blacksmith_test.go @@ -2,9 +2,9 @@ package cli import ( "context" + "errors" "io" "os" - "os/exec" "path/filepath" "reflect" "strings" @@ -12,6 +12,27 @@ import ( "time" ) +type blacksmithFuncRunner struct { + calls [][]string + fn func(LocalCommandRequest) (LocalCommandResult, error) +} + +func (r *blacksmithFuncRunner) Run(_ context.Context, req LocalCommandRequest) (LocalCommandResult, error) { + r.calls = append(r.calls, append([]string(nil), req.Args...)) + if r.fn != nil { + return r.fn(req) + } + return LocalCommandResult{}, nil +} + +func newTestBlacksmithBackend(cfg Config, runner CommandRunner) *blacksmithBackend { + return &blacksmithBackend{ + spec: testBlacksmithProvider{}.Spec(), + cfg: cfg, + rt: Runtime{Stdout: io.Discard, Stderr: io.Discard, Clock: realClock{}, Exec: runner}, + } +} + func TestBlacksmithWarmupArgs(t *testing.T) { cfg := baseConfig() cfg.Blacksmith = BlacksmithConfig{ @@ -66,18 +87,14 @@ func TestBlacksmithWarmupFailureRemovesPendingKey(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config")) - original := blacksmithCommandContext - blacksmithCommandContext = func(context.Context, string, ...string) *exec.Cmd { - return exec.Command("sh", "-c", "exit 1") - } - t.Cleanup(func() { - blacksmithCommandContext = original - }) + runner := &blacksmithFuncRunner{fn: func(LocalCommandRequest) (LocalCommandResult, error) { + return LocalCommandResult{ExitCode: 1}, errors.New("exit status 1") + }} cfg := baseConfig() cfg.Blacksmith.Workflow = ".github/workflows/testbox.yml" - app := App{Stdout: io.Discard, Stderr: io.Discard} - _, _, err := app.blacksmithWarmupLease(context.Background(), cfg, Repo{Root: "/repo"}, false) + backend := newTestBlacksmithBackend(cfg, runner) + _, _, err := backend.warmupLease(context.Background(), Repo{Root: "/repo"}, false) if err == nil { t.Fatal("expected warmup failure") } @@ -98,27 +115,23 @@ func TestBlacksmithWarmupFailureStopsPrintedTestbox(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config")) - original := blacksmithCommandContext var stopped string - blacksmithCommandContext = func(_ context.Context, _ string, args ...string) *exec.Cmd { - if len(args) >= 3 && args[0] == "testbox" && args[1] == "stop" { - for i, arg := range args { - if arg == "--id" && i+1 < len(args) { - stopped = args[i+1] + runner := &blacksmithFuncRunner{fn: func(req LocalCommandRequest) (LocalCommandResult, error) { + if len(req.Args) >= 3 && req.Args[0] == "testbox" && req.Args[1] == "stop" { + for i, arg := range req.Args { + if arg == "--id" && i+1 < len(req.Args) { + stopped = req.Args[i+1] } } - return exec.Command("sh", "-c", "exit 0") + return LocalCommandResult{}, nil } - return exec.Command("sh", "-c", "printf 'queued tbx_leaked123\\n'; exit 1") - } - t.Cleanup(func() { - blacksmithCommandContext = original - }) + return LocalCommandResult{ExitCode: 1, Stdout: "queued tbx_leaked123\n"}, errors.New("exit status 1") + }} cfg := baseConfig() cfg.Blacksmith.Workflow = ".github/workflows/testbox.yml" - app := App{Stdout: io.Discard, Stderr: io.Discard} - _, _, err := app.blacksmithWarmupLease(context.Background(), cfg, Repo{Root: "/repo"}, false) + backend := newTestBlacksmithBackend(cfg, runner) + _, _, err := backend.warmupLease(context.Background(), Repo{Root: "/repo"}, false) if err == nil { t.Fatal("expected warmup failure") } @@ -131,7 +144,6 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config")) - original := blacksmithCommandContext originalDelay := blacksmithCleanupDelay originalAttempts := blacksmithCleanupAttempts originalQuiet := blacksmithCleanupQuiet @@ -140,26 +152,25 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) { blacksmithCleanupQuiet = 1 var stopped string listCalls := 0 - blacksmithCommandContext = func(_ context.Context, _ string, args ...string) *exec.Cmd { - if len(args) >= 3 && args[0] == "testbox" && args[1] == "list" { + runner := &blacksmithFuncRunner{fn: func(req LocalCommandRequest) (LocalCommandResult, error) { + if len(req.Args) >= 3 && req.Args[0] == "testbox" && req.Args[1] == "list" { listCalls++ if listCalls < 3 { - return exec.Command("sh", "-c", "printf 'ID STATUS REPO WORKFLOW JOB REF CREATED\\n'") + return LocalCommandResult{Stdout: "ID STATUS REPO WORKFLOW JOB REF CREATED\n"}, nil } - return exec.Command("sh", "-c", "printf 'tbx_async123 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:47.000000Z\\n'") + return LocalCommandResult{Stdout: "tbx_async123 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:47.000000Z\n"}, nil } - if len(args) >= 3 && args[0] == "testbox" && args[1] == "stop" { - for i, arg := range args { - if arg == "--id" && i+1 < len(args) { - stopped = args[i+1] + if len(req.Args) >= 3 && req.Args[0] == "testbox" && req.Args[1] == "stop" { + for i, arg := range req.Args { + if arg == "--id" && i+1 < len(req.Args) { + stopped = req.Args[i+1] } } - return exec.Command("sh", "-c", "exit 0") + return LocalCommandResult{}, nil } - return exec.Command("sh", "-c", "printf 'workflow missing\\n'; exit 1") - } + return LocalCommandResult{ExitCode: 1, Stdout: "workflow missing\n"}, errors.New("exit status 1") + }} t.Cleanup(func() { - blacksmithCommandContext = original blacksmithCleanupDelay = originalDelay blacksmithCleanupAttempts = originalAttempts blacksmithCleanupQuiet = originalQuiet @@ -169,8 +180,8 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) { cfg.Blacksmith.Workflow = ".github/workflows/testbox.yml" cfg.Blacksmith.Job = "check" cfg.Blacksmith.Ref = "main" - app := App{Stdout: io.Discard, Stderr: io.Discard} - _, _, err := app.blacksmithWarmupLease(context.Background(), cfg, Repo{Root: "/repo"}, false) + backend := newTestBlacksmithBackend(cfg, runner) + _, _, err := backend.warmupLease(context.Background(), Repo{Root: "/repo"}, false) if err == nil { t.Fatal("expected warmup failure") } @@ -183,7 +194,6 @@ func TestBlacksmithWarmupFailureContinuesAfterFirstDelayedStop(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home) t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config")) - original := blacksmithCommandContext originalDelay := blacksmithCleanupDelay originalAttempts := blacksmithCleanupAttempts originalQuiet := blacksmithCleanupQuiet @@ -192,30 +202,29 @@ func TestBlacksmithWarmupFailureContinuesAfterFirstDelayedStop(t *testing.T) { blacksmithCleanupQuiet = 1 stopped := []string{} listCalls := 0 - blacksmithCommandContext = func(_ context.Context, _ string, args ...string) *exec.Cmd { - if len(args) >= 3 && args[0] == "testbox" && args[1] == "list" { + runner := &blacksmithFuncRunner{fn: func(req LocalCommandRequest) (LocalCommandResult, error) { + if len(req.Args) >= 3 && req.Args[0] == "testbox" && req.Args[1] == "list" { listCalls++ switch listCalls { case 2: - return exec.Command("sh", "-c", "printf 'tbx_delayed1 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:47.000000Z\\n'") + return LocalCommandResult{Stdout: "tbx_delayed1 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:47.000000Z\n"}, nil case 3: - return exec.Command("sh", "-c", "printf 'tbx_delayed2 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:48.000000Z\\n'") + return LocalCommandResult{Stdout: "tbx_delayed2 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:48.000000Z\n"}, nil default: - return exec.Command("sh", "-c", "printf 'ID STATUS REPO WORKFLOW JOB REF CREATED\\n'") + return LocalCommandResult{Stdout: "ID STATUS REPO WORKFLOW JOB REF CREATED\n"}, nil } } - if len(args) >= 3 && args[0] == "testbox" && args[1] == "stop" { - for i, arg := range args { - if arg == "--id" && i+1 < len(args) { - stopped = append(stopped, args[i+1]) + if len(req.Args) >= 3 && req.Args[0] == "testbox" && req.Args[1] == "stop" { + for i, arg := range req.Args { + if arg == "--id" && i+1 < len(req.Args) { + stopped = append(stopped, req.Args[i+1]) } } - return exec.Command("sh", "-c", "exit 0") + return LocalCommandResult{}, nil } - return exec.Command("sh", "-c", "printf 'workflow missing\\n'; exit 1") - } + return LocalCommandResult{ExitCode: 1, Stdout: "workflow missing\n"}, errors.New("exit status 1") + }} t.Cleanup(func() { - blacksmithCommandContext = original blacksmithCleanupDelay = originalDelay blacksmithCleanupAttempts = originalAttempts blacksmithCleanupQuiet = originalQuiet @@ -225,8 +234,8 @@ func TestBlacksmithWarmupFailureContinuesAfterFirstDelayedStop(t *testing.T) { cfg.Blacksmith.Workflow = ".github/workflows/testbox.yml" cfg.Blacksmith.Job = "check" cfg.Blacksmith.Ref = "main" - app := App{Stdout: io.Discard, Stderr: io.Discard} - _, _, err := app.blacksmithWarmupLease(context.Background(), cfg, Repo{Root: "/repo"}, false) + backend := newTestBlacksmithBackend(cfg, runner) + _, _, err := backend.warmupLease(context.Background(), Repo{Root: "/repo"}, false) if err == nil { t.Fatal("expected warmup failure") } @@ -240,30 +249,25 @@ func TestBlacksmithOneShotRunRemovesClaimAfterStop(t *testing.T) { t.Setenv("HOME", home) t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config")) t.Setenv("XDG_STATE_HOME", filepath.Join(home, ".local", "state")) - original := blacksmithCommandContext - calls := 0 - blacksmithCommandContext = func(_ context.Context, _ string, args ...string) *exec.Cmd { - calls++ - if len(args) >= 3 && args[0] == "testbox" && args[1] == "warmup" { - return exec.Command("sh", "-c", "printf 'ready tbx_abc123\\n'") + runner := &blacksmithFuncRunner{fn: func(req LocalCommandRequest) (LocalCommandResult, error) { + if len(req.Args) >= 3 && req.Args[0] == "testbox" && req.Args[1] == "warmup" { + return LocalCommandResult{Stdout: "ready tbx_abc123\n"}, nil } - return exec.Command("sh", "-c", "exit 0") - } - t.Cleanup(func() { - blacksmithCommandContext = original - }) + return LocalCommandResult{}, nil + }} cfg := baseConfig() cfg.Blacksmith.Workflow = ".github/workflows/testbox.yml" - app := App{Stdout: io.Discard, Stderr: io.Discard} - err := app.blacksmithRun(context.Background(), cfg, Repo{Root: "/repo"}, blacksmithRunOptions{ + backend := newTestBlacksmithBackend(cfg, runner) + _, err := backend.Run(context.Background(), RunRequest{ + Repo: Repo{Root: "/repo"}, Command: []string{"true"}, }) if err != nil { t.Fatal(err) } - if calls != 4 { - t.Fatalf("blacksmith calls=%d, want list/warmup/run/stop", calls) + if len(runner.calls) != 4 { + t.Fatalf("blacksmith calls=%d, want list/warmup/run/stop", len(runner.calls)) } if claim, err := readLeaseClaim("tbx_abc123"); err != nil { t.Fatal(err) diff --git a/internal/cli/cache.go b/internal/cli/cache.go index 5d1068f..d0aea60 100644 --- a/internal/cli/cache.go +++ b/internal/cli/cache.go @@ -147,7 +147,7 @@ func (a App) cacheTarget(ctx context.Context, id string, reclaim bool) (SSHTarge if claimErr := claimLeaseForRepoConfig(leaseID, serverSlug(server), cfg, repo.Root, cfg.IdleTimeout, reclaim); claimErr != nil { return SSHTarget{}, Config{}, "", claimErr } - a.touchActiveLeaseBestEffort(ctx, cfg, server, leaseID) + a.touchLeaseTargetBestEffort(ctx, cfg, LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, "") } return target, cfg, leaseID, err } diff --git a/internal/cli/capabilities.go b/internal/cli/capabilities.go index 09ab401..69ecc97 100644 --- a/internal/cli/capabilities.go +++ b/internal/cli/capabilities.go @@ -33,14 +33,19 @@ func applyCapabilityFlags(cfg *Config, desktop, browser, code bool) { } func validateRequestedCapabilities(cfg Config) error { - if cfg.Desktop && isBlacksmithProvider(cfg.Provider) { - return exit(2, "desktop/VNC is not supported for provider=%s; Blacksmith owns machine connectivity", cfg.Provider) + provider, err := ProviderFor(cfg.Provider) + if err != nil { + return err } - if cfg.Browser && isBlacksmithProvider(cfg.Provider) { - return exit(2, "browser provisioning is not supported for provider=%s; use Blacksmith workflow setup for headless browser automation", cfg.Provider) + spec := provider.Spec() + if cfg.Desktop && !featureSetHas(spec.Features, FeatureDesktop) { + return exit(2, "desktop/VNC is not supported for provider=%s", provider.Name()) } - if cfg.Code && isBlacksmithProvider(cfg.Provider) { - return exit(2, "web code is not supported for provider=%s; Blacksmith owns machine connectivity", cfg.Provider) + if cfg.Browser && !featureSetHas(spec.Features, FeatureBrowser) { + return exit(2, "browser provisioning is not supported for provider=%s", provider.Name()) + } + if cfg.Code && !featureSetHas(spec.Features, FeatureCode) { + return exit(2, "web code is not supported for provider=%s", provider.Name()) } if cfg.Code && cfg.TargetOS != targetLinux { return exit(2, "web code currently supports managed Linux leases only") diff --git a/internal/cli/code.go b/internal/cli/code.go index f7d1ee6..de1298a 100644 --- a/internal/cli/code.go +++ b/internal/cli/code.go @@ -113,7 +113,7 @@ func (a App) webCode(ctx context.Context, args []string) error { if err := claimLeaseForRepoConfig(leaseID, serverSlug(server), cfg, repo.Root, cfg.IdleTimeout, *reclaim); err != nil { return err } - a.touchActiveLeaseBestEffort(ctx, cfg, server, leaseID) + a.touchLeaseTargetBestEffort(ctx, cfg, LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, "") workspace, folder, hydratedByActions := codeWorkspace(ctx, target, cfg, leaseID, repo) if hydratedByActions { fmt.Fprintf(a.Stderr, "using GitHub Actions workspace %s\n", workspace) diff --git a/internal/cli/desktop.go b/internal/cli/desktop.go index 0df97f6..5601313 100644 --- a/internal/cli/desktop.go +++ b/internal/cli/desktop.go @@ -63,7 +63,7 @@ func (a App) desktopLaunch(ctx context.Context, args []string) error { if err := claimLeaseForRepoConfig(leaseID, serverSlug(server), cfg, repo.Root, cfg.IdleTimeout, *reclaim); err != nil { return err } - a.touchActiveLeaseBestEffort(ctx, cfg, server, leaseID) + a.touchLeaseTargetBestEffort(ctx, cfg, LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, "") if err := waitForLoopbackVNC(ctx, &target); err != nil { return err } diff --git a/internal/cli/flags.go b/internal/cli/flags.go index 05b93d7..eefa809 100644 --- a/internal/cli/flags.go +++ b/internal/cli/flags.go @@ -12,6 +12,10 @@ func flagWasSet(fs *flag.FlagSet, name string) bool { return seen } +func FlagWasSet(fs *flag.FlagSet, name string) bool { + return flagWasSet(fs, name) +} + func extractBoolFlag(args []string, name string) ([]string, bool) { want := "--" + name out := make([]string, 0, len(args)) diff --git a/internal/cli/lease_flags.go b/internal/cli/lease_flags.go index 7b0f8ae..7b11e52 100644 --- a/internal/cli/lease_flags.go +++ b/internal/cli/lease_flags.go @@ -8,36 +8,36 @@ import ( ) type leaseCreateFlagValues struct { - Provider *string - Profile *string - Class *string - ServerType *string - Market *string - TTL *time.Duration - Idle *time.Duration - Desktop *bool - Browser *bool - Code *bool - Blacksmith blacksmithFlagValues - Target targetFlagValues - Network networkFlagValues + Provider *string + Profile *string + Class *string + ServerType *string + Market *string + TTL *time.Duration + Idle *time.Duration + Desktop *bool + Browser *bool + Code *bool + ProviderFlags providerFlagValues + Target targetFlagValues + Network networkFlagValues } func registerLeaseCreateFlags(fs *flag.FlagSet, defaults Config) leaseCreateFlagValues { return leaseCreateFlagValues{ - Provider: fs.String("provider", defaults.Provider, "provider: hetzner, aws, ssh, or blacksmith-testbox"), - Profile: fs.String("profile", defaults.Profile, "profile"), - Class: fs.String("class", defaults.Class, "machine class"), - ServerType: fs.String("type", getenv("CRABBOX_SERVER_TYPE", ""), "provider server/instance type"), - Market: fs.String("market", defaults.Capacity.Market, "capacity market: spot or on-demand"), - TTL: fs.Duration("ttl", defaults.TTL, "maximum lease lifetime"), - Idle: fs.Duration("idle-timeout", defaults.IdleTimeout, "idle timeout"), - Desktop: fs.Bool("desktop", defaults.Desktop, "provision or require a visible desktop/VNC session"), - Browser: fs.Bool("browser", defaults.Browser, "provision or require a browser binary"), - Code: fs.Bool("code", defaults.Code, "provision or require web code-server capability"), - Blacksmith: registerBlacksmithFlags(fs, defaults), - Target: registerTargetFlags(fs, defaults), - Network: registerNetworkFlags(fs, defaults), + Provider: fs.String("provider", defaults.Provider, "provider: hetzner, aws, ssh, or blacksmith-testbox"), + Profile: fs.String("profile", defaults.Profile, "profile"), + Class: fs.String("class", defaults.Class, "machine class"), + ServerType: fs.String("type", getenv("CRABBOX_SERVER_TYPE", ""), "provider server/instance type"), + Market: fs.String("market", defaults.Capacity.Market, "capacity market: spot or on-demand"), + TTL: fs.Duration("ttl", defaults.TTL, "maximum lease lifetime"), + Idle: fs.Duration("idle-timeout", defaults.IdleTimeout, "idle timeout"), + Desktop: fs.Bool("desktop", defaults.Desktop, "provision or require a visible desktop/VNC session"), + Browser: fs.Bool("browser", defaults.Browser, "provision or require a browser binary"), + Code: fs.Bool("code", defaults.Code, "provision or require web code-server capability"), + ProviderFlags: registerProviderFlags(fs, defaults), + Target: registerTargetFlags(fs, defaults), + Network: registerNetworkFlags(fs, defaults), } } @@ -62,7 +62,9 @@ func applyLeaseCreateFlags(cfg *Config, fs *flag.FlagSet, values leaseCreateFlag if flagWasSet(fs, "idle-timeout") { cfg.IdleTimeout = *values.Idle } - applyBlacksmithFlagOverrides(cfg, fs, values.Blacksmith) + if err := applyProviderFlags(cfg, fs, values.ProviderFlags); err != nil { + return err + } if err := validateProviderTarget(*cfg); err != nil { return err } @@ -141,6 +143,6 @@ func (a App) claimAndTouchLeaseTarget(ctx context.Context, cfg Config, server Se if err := claimLeaseForRepoConfig(leaseID, serverSlug(server), cfg, repo.Root, cfg.IdleTimeout, reclaim); err != nil { return err } - a.touchActiveLeaseBestEffort(ctx, cfg, server, leaseID) + a.touchLeaseTargetBestEffort(ctx, cfg, LeaseTarget{Server: server, LeaseID: leaseID}, "") return nil } diff --git a/internal/cli/pool.go b/internal/cli/pool.go index 6a41106..ecfb365 100644 --- a/internal/cli/pool.go +++ b/internal/cli/pool.go @@ -3,16 +3,17 @@ package cli import ( "context" "encoding/json" - "fmt" "strings" "time" ) func (a App) list(ctx context.Context, args []string) error { + defaults := defaultConfig() fs := newFlagSet("list", a.Stderr) - provider := fs.String("provider", defaultConfig().Provider, "provider: hetzner, aws, ssh, or blacksmith-testbox") + provider := fs.String("provider", defaults.Provider, "provider: hetzner, aws, ssh, or blacksmith-testbox") jsonOut := fs.Bool("json", false, "print JSON") - targetFlags := registerTargetFlags(fs, defaultConfig()) + providerFlags := registerProviderFlags(fs, defaults) + targetFlags := registerTargetFlags(fs, defaults) if err := parseFlags(fs, args); err != nil { return err } @@ -21,93 +22,39 @@ func (a App) list(ctx context.Context, args []string) error { return err } cfg.Provider = *provider + if err := applyProviderFlags(&cfg, fs, providerFlags); err != nil { + return err + } if err := applyTargetFlagOverrides(&cfg, fs, targetFlags); err != nil { return err } - if isBlacksmithProvider(cfg.Provider) { - return a.blacksmithList(ctx, cfg, *jsonOut) - } - if isStaticProvider(cfg.Provider) { - server, _, _, err := staticLease(cfg) - if err != nil { - return err - } - servers := []Server{server} - if *jsonOut { - return json.NewEncoder(a.Stdout).Encode(servers) - } - for _, s := range servers { - fmt.Fprintf(a.Stdout, "%-20s %-28s %-12s %-14s %-15s lease=%s slug=%s keep=%s target=%s\n", - s.DisplayID(), s.Name, s.Status, s.ServerType.Name, s.PublicNet.IPv4.IP, s.Labels["lease"], blank(serverSlug(s), "-"), s.Labels["keep"], s.Labels["target"]) - } - return nil - } - if _, ok, err := newCoordinatorClient(cfg); err != nil { - return err - } else if ok { - if cfg.CoordAdminToken == "" { - return exit(2, "pool list requires broker.adminToken or CRABBOX_COORDINATOR_ADMIN_TOKEN when a coordinator is configured") - } - cfg.CoordToken = cfg.CoordAdminToken - coord, _, err := newCoordinatorClient(cfg) - if err != nil { - return err - } - machines, err := coord.Pool(ctx, cfg) - if err != nil { - return err - } - activeLeases, err := coord.AdminLeases(ctx, "active", "", "", 1000) - if err != nil { - fmt.Fprintf(a.Stderr, "warning: active lease lookup failed; orphan status unavailable: %v\n", err) - } - activeLeaseIDs := activeCoordinatorLeaseIDs(activeLeases) - if *jsonOut { - return json.NewEncoder(a.Stdout).Encode(machines) - } - for _, s := range machines { - extra := "" - if err == nil { - extra = coordinatorMachineOrphanField(s.Labels, activeLeaseIDs) - } - fmt.Fprintf(a.Stdout, "%-20s %-28s %-12s %-14s %-15s lease=%s slug=%s keep=%s%s\n", - s.ID, s.Name, s.Status, s.ServerType, s.Host, s.Labels["lease"], blank(s.Labels["slug"], "-"), s.Labels["keep"], extra) - } - return nil - } - if cfg.Provider == "aws" { - client, err := newAWSClient(ctx, cfg) - if err != nil { - return err - } - servers, err := client.ListCrabboxServers(ctx) - if err != nil { - return err - } - if *jsonOut { - return json.NewEncoder(a.Stdout).Encode(servers) - } - for _, s := range servers { - fmt.Fprintf(a.Stdout, "%-20s %-28s %-12s %-14s %-15s lease=%s slug=%s keep=%s\n", - s.DisplayID(), s.Name, s.Status, s.ServerType.Name, s.PublicNet.IPv4.IP, s.Labels["lease"], blank(serverSlug(s), "-"), s.Labels["keep"]) - } - return nil - } - client, err := newHetznerClient() + backend, err := loadBackend(cfg, runtimeForApp(a)) if err != nil { return err } - servers, err := client.ListCrabboxServers(ctx) + var servers []Server + switch b := backend.(type) { + case SSHLeaseBackend: + servers, err = b.List(ctx, ListRequest{Options: leaseOptionsFromConfig(cfg)}) + case DelegatedRunBackend: + servers, err = b.List(ctx, ListRequest{Options: leaseOptionsFromConfig(cfg)}) + default: + return exit(2, "provider=%s does not support list", backend.Spec().Name) + } if err != nil { return err } if *jsonOut { + if jsonBackend, ok := backend.(JSONListBackend); ok { + view, err := jsonBackend.ListJSON(ctx, ListRequest{Options: leaseOptionsFromConfig(cfg)}) + if err != nil { + return err + } + return json.NewEncoder(a.Stdout).Encode(view) + } return json.NewEncoder(a.Stdout).Encode(servers) } - for _, s := range servers { - fmt.Fprintf(a.Stdout, "%-20s %-28s %-12s %-14s %-15s lease=%s slug=%s keep=%s\n", - s.DisplayID(), s.Name, s.Status, s.ServerType.Name, s.PublicNet.IPv4.IP, s.Labels["lease"], blank(serverSlug(s), "-"), s.Labels["keep"]) - } + renderServerList(a.Stdout, servers) return nil } @@ -133,10 +80,12 @@ func coordinatorMachineOrphanField(labels map[string]string, activeLeaseIDs map[ } func (a App) cleanup(ctx context.Context, args []string) error { + defaults := defaultConfig() fs := newFlagSet("machine cleanup", a.Stderr) - provider := fs.String("provider", defaultConfig().Provider, "provider: hetzner or aws") + provider := fs.String("provider", defaults.Provider, "provider: hetzner or aws") dryRun := fs.Bool("dry-run", false, "only print") - targetFlags := registerTargetFlags(fs, defaultConfig()) + providerFlags := registerProviderFlags(fs, defaults) + targetFlags := registerTargetFlags(fs, defaults) if err := parseFlags(fs, args); err != nil { return err } @@ -145,63 +94,24 @@ func (a App) cleanup(ctx context.Context, args []string) error { return err } cfg.Provider = *provider + if err := applyProviderFlags(&cfg, fs, providerFlags); err != nil { + return err + } if err := applyTargetFlagOverrides(&cfg, fs, targetFlags); err != nil { return err } - if isStaticProvider(cfg.Provider) { - return exit(2, "machine cleanup is not supported for provider=%s", cfg.Provider) - } - if _, ok, err := newCoordinatorClient(cfg); err != nil { + backend, err := loadBackend(cfg, runtimeForApp(a)) + if err != nil { return err - } else if ok { + } + if backendCoordinator(backend) != nil { return exit(2, "machine cleanup is disabled when a coordinator is configured; coordinator TTL alarms own brokered cleanup") } - if cfg.Provider == "aws" { - awsClient, err := newAWSClient(ctx, cfg) - if err != nil { - return err - } - servers, err := awsClient.ListCrabboxServers(ctx) - if err != nil { - return err - } - for _, s := range servers { - shouldDelete, reason := shouldCleanupServer(s, time.Now().UTC()) - if !shouldDelete { - fmt.Fprintf(a.Stderr, "skip server id=%s name=%s reason=%s\n", s.DisplayID(), s.Name, reason) - continue - } - fmt.Fprintf(a.Stderr, "delete server id=%s name=%s\n", s.DisplayID(), s.Name) - if !*dryRun { - if err := deleteServer(ctx, cfg, s); err != nil { - return err - } - } - } - return nil + cleaner, ok := backend.(CleanupBackend) + if !ok { + return exit(2, "machine cleanup is not supported for provider=%s", cfg.Provider) } - client, err := newHetznerClient() - if err != nil { - return err - } - servers, err := client.ListCrabboxServers(ctx) - if err != nil { - return err - } - for _, s := range servers { - shouldDelete, reason := shouldCleanupServer(s, time.Now().UTC()) - if !shouldDelete { - fmt.Fprintf(a.Stderr, "skip server id=%s name=%s reason=%s\n", s.DisplayID(), s.Name, reason) - continue - } - fmt.Fprintf(a.Stderr, "delete server id=%s name=%s\n", s.DisplayID(), s.Name) - if !*dryRun { - if err := deleteServer(ctx, cfg, s); err != nil { - return err - } - } - } - return nil + return cleaner.Cleanup(ctx, CleanupRequest{Options: leaseOptionsFromConfig(cfg), DryRun: *dryRun}) } func shouldCleanupServer(server Server, now time.Time) (bool, string) { diff --git a/internal/cli/provider_aws.go b/internal/cli/provider_aws.go new file mode 100644 index 0000000..5fb2c66 --- /dev/null +++ b/internal/cli/provider_aws.go @@ -0,0 +1,119 @@ +package cli + +import ( + "context" + "fmt" + "strings" +) + +type awsLeaseBackend struct{ directSSHBackend } + +func (b *awsLeaseBackend) Acquire(ctx context.Context, req AcquireRequest) (LeaseTarget, error) { + return acquireAttemptsRetry(b.rt, req.Keep, func() (LeaseTarget, error) { + return b.acquireOnce(ctx, req.Keep) + }) +} + +func (b *awsLeaseBackend) acquireOnce(ctx context.Context, keep bool) (LeaseTarget, error) { + if b.cfg.Tailscale.Enabled && b.cfg.Tailscale.AuthKey == "" { + return LeaseTarget{}, exit(2, "direct --tailscale requires %s to contain a Tailscale auth key; brokered mode uses coordinator OAuth secrets", b.cfg.Tailscale.AuthKeyEnv) + } + cfg := chooseAWSRegion(ctx, b.cfg, b.rt.Stderr) + client, err := newAWSClient(ctx, cfg) + if err != nil { + return LeaseTarget{}, err + } + leaseID := newLeaseID() + servers, err := client.ListCrabboxServers(ctx) + if err != nil { + return LeaseTarget{}, err + } + slug := allocateDirectLeaseSlug(leaseID, servers) + keyPath, publicKey, err := ensureTestboxKeyForConfig(cfg, leaseID) + if err != nil { + return LeaseTarget{}, err + } + cfg.SSHKey = keyPath + cfg.ProviderKey = providerKeyForLease(leaseID) + ensureAWSSSHCIDRs(ctx, &cfg) + fmt.Fprintf(b.rt.Stderr, "provisioning provider=aws lease=%s slug=%s class=%s preferred_type=%s region=%s keep=%v market=%s strategy=%s\n", leaseID, slug, cfg.Class, cfg.ServerType, cfg.AWSRegion, keep, cfg.Capacity.Market, cfg.Capacity.Strategy) + server, cfg, err := client.CreateServerWithFallback(ctx, cfg, publicKey, leaseID, slug, keep, func(format string, args ...any) { + fmt.Fprintf(b.rt.Stderr, format, args...) + }) + if err != nil { + return LeaseTarget{}, err + } + fmt.Fprintf(b.rt.Stderr, "provisioned lease=%s server=%s type=%s\n", leaseID, server.DisplayID(), cfg.ServerType) + server, err = client.waitForServerIP(ctx, server.CloudID) + if err != nil { + return LeaseTarget{}, err + } + target := sshTargetFromConfig(cfg, server.PublicNet.IPv4.IP) + if err := bootstrapAWSWindowsDesktop(ctx, cfg, &target, publicKey, b.rt.Stderr); err != nil { + _ = client.DeleteServer(context.Background(), server.CloudID) + return LeaseTarget{}, err + } + server.Labels["state"] = "ready" + if err := client.SetTags(ctx, server.CloudID, server.Labels); err != nil { + fmt.Fprintf(b.rt.Stderr, "warning: set tags: %v\n", err) + } + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, nil +} + +func (b *awsLeaseBackend) Resolve(ctx context.Context, req ResolveRequest) (LeaseTarget, error) { + client, err := newAWSClient(ctx, b.cfg) + if err != nil { + return LeaseTarget{}, err + } + if strings.HasPrefix(req.ID, "i-") { + server, err := client.GetServer(ctx, req.ID) + if err != nil { + return LeaseTarget{}, err + } + leaseID := blank(server.Labels["lease"], req.ID) + target := sshTargetFromConfig(b.cfg, server.PublicNet.IPv4.IP) + useStoredTestboxKey(&target, leaseID) + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, nil + } + servers, err := client.ListCrabboxServers(ctx) + if err != nil { + return LeaseTarget{}, err + } + if server, leaseID, err := findServerByAlias(servers, req.ID); err != nil { + return LeaseTarget{}, err + } else if leaseID != "" { + target := sshTargetFromConfig(b.cfg, server.PublicNet.IPv4.IP) + useStoredTestboxKey(&target, leaseID) + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, nil + } + return LeaseTarget{}, exit(4, "lease/server not found: %s", req.ID) +} + +func (b *awsLeaseBackend) List(ctx context.Context, req ListRequest) ([]LeaseView, error) { + _ = req + client, err := newAWSClient(ctx, b.cfg) + if err != nil { + return nil, err + } + return client.ListCrabboxServers(ctx) +} + +func (b *awsLeaseBackend) ReleaseLease(ctx context.Context, req ReleaseLeaseRequest) error { + if err := deleteServer(ctx, b.cfg, req.Lease.Server); err != nil { + return err + } + removeLeaseClaim(req.Lease.LeaseID) + return nil +} + +func (b *awsLeaseBackend) Touch(ctx context.Context, req TouchRequest) (Server, error) { + return b.touch(ctx, req.Lease.Server, req.State), nil +} + +func (b *awsLeaseBackend) Cleanup(ctx context.Context, req CleanupRequest) error { + servers, err := b.List(ctx, ListRequest{Options: req.Options}) + if err != nil { + return err + } + return b.cleanupServers(ctx, req, servers) +} diff --git a/internal/cli/provider_backend.go b/internal/cli/provider_backend.go new file mode 100644 index 0000000..c038068 --- /dev/null +++ b/internal/cli/provider_backend.go @@ -0,0 +1,461 @@ +package cli + +import ( + "bytes" + "context" + "flag" + "fmt" + "io" + "net/http" + "os/exec" + "sort" + "strings" + "time" +) + +type Provider interface { + Name() string + Aliases() []string + Spec() ProviderSpec + RegisterFlags(fs *flag.FlagSet, defaults Config) any + ApplyFlags(cfg *Config, fs *flag.FlagSet, values any) error + Configure(cfg Config, rt Runtime) (Backend, error) +} + +type Backend interface { + Spec() ProviderSpec +} + +type SSHLeaseBackend interface { + Backend + Acquire(ctx context.Context, req AcquireRequest) (LeaseTarget, error) + Resolve(ctx context.Context, req ResolveRequest) (LeaseTarget, error) + List(ctx context.Context, req ListRequest) ([]LeaseView, error) + ReleaseLease(ctx context.Context, req ReleaseLeaseRequest) error + Touch(ctx context.Context, req TouchRequest) (Server, error) +} + +type DelegatedRunBackend interface { + Backend + Warmup(ctx context.Context, req WarmupRequest) error + Run(ctx context.Context, req RunRequest) (RunResult, error) + List(ctx context.Context, req ListRequest) ([]LeaseView, error) + Status(ctx context.Context, req StatusRequest) (statusView, error) + Stop(ctx context.Context, req StopRequest) error +} + +type CleanupBackend interface { + Backend + Cleanup(ctx context.Context, req CleanupRequest) error +} + +type JSONListBackend interface { + Backend + ListJSON(ctx context.Context, req ListRequest) (any, error) +} + +type ProviderSpec struct { + Name string + Kind ProviderKind + Targets []TargetSpec + Features FeatureSet + Coordinator CoordinatorMode +} + +type ProviderKind string + +const ( + ProviderKindSSHLease ProviderKind = "ssh-lease" + ProviderKindDelegatedRun ProviderKind = "delegated-run" +) + +type CoordinatorMode string + +const ( + CoordinatorNever CoordinatorMode = "never" + CoordinatorSupported CoordinatorMode = "supported" +) + +type TargetSpec struct { + OS string + WindowsMode string +} + +type Feature string + +const ( + FeatureSSH Feature = "ssh" + FeatureCrabboxSync Feature = "crabbox-sync" + FeatureCleanup Feature = "cleanup" + FeatureDesktop Feature = "desktop" + FeatureBrowser Feature = "browser" + FeatureCode Feature = "code" + FeatureTailscale Feature = "tailscale" +) + +type FeatureSet []Feature + +type Runtime struct { + Stdout io.Writer + Stderr io.Writer + Clock Clock + HTTP *http.Client + Exec CommandRunner +} + +type Clock interface { + Now() time.Time +} + +type realClock struct{} + +func (realClock) Now() time.Time { return time.Now() } + +type CommandRunner interface { + Run(ctx context.Context, req LocalCommandRequest) (LocalCommandResult, error) +} + +type LocalCommandRequest struct { + Name string + Args []string + Env []string + Dir string + Stdout io.Writer + Stderr io.Writer +} + +type LocalCommandResult struct { + ExitCode int + Stdout string + Stderr string +} + +type execCommandRunner struct{} + +func (execCommandRunner) Run(ctx context.Context, req LocalCommandRequest) (LocalCommandResult, error) { + cmd := exec.CommandContext(ctx, req.Name, req.Args...) + cmd.Env = req.Env + cmd.Dir = req.Dir + var stdout bytes.Buffer + var stderr bytes.Buffer + if req.Stdout != nil { + cmd.Stdout = io.MultiWriter(req.Stdout, &stdout) + } else { + cmd.Stdout = &stdout + } + if req.Stderr != nil { + cmd.Stderr = io.MultiWriter(req.Stderr, &stderr) + } else { + cmd.Stderr = &stderr + } + err := cmd.Run() + result := LocalCommandResult{ExitCode: exitCode(err), Stdout: stdout.String(), Stderr: stderr.String()} + if err == nil { + result.ExitCode = 0 + } + return result, err +} + +type LeaseOptions struct { + TargetOS string + WindowsMode string + Class string + ServerType string + IdleTimeout time.Duration + TTL time.Duration + Desktop bool + Browser bool + Code bool + Tailscale TailscaleConfig + WorkRoot string + SSHUser string + SSHPort string + SSHKey string + Sync SyncConfig + Results ResultsConfig + EnvAllow []string + ActionsRunner bool +} + +type AcquireRequest struct { + Repo Repo + Options LeaseOptions + Keep bool + Reclaim bool +} + +type ResolveRequest struct { + Repo Repo + Options LeaseOptions + ID string + Reclaim bool +} + +type ReleaseLeaseRequest struct { + Lease LeaseTarget + Force bool +} + +type TouchRequest struct { + Lease LeaseTarget + State string + IdleTimeout time.Duration +} + +type ListRequest struct { + Options LeaseOptions +} + +type RunRequest struct { + Repo Repo + ID string + Options LeaseOptions + Keep bool + Reclaim bool + NoSync bool + SyncOnly bool + DebugSync bool + ShellMode bool + ChecksumSync bool + ForceSyncLarge bool + Command []string + TimingJSON bool +} + +type WarmupRequest struct { + Repo Repo + Options LeaseOptions + Keep bool + Reclaim bool + ActionsRunner bool + TimingJSON bool +} + +type StatusRequest struct { + Options LeaseOptions + ID string + Wait bool + WaitTimeout time.Duration +} + +type StopRequest struct { + Options LeaseOptions + ID string +} + +type CleanupRequest struct { + Options LeaseOptions + DryRun bool +} + +type RunResult struct { + ExitCode int + Command time.Duration + Total time.Duration + SyncDelegated bool +} + +type LeaseTarget struct { + Server Server + SSH SSHTarget + LeaseID string + Coordinator *CoordinatorClient +} + +type LeaseView = Server + +var providerRegistry = map[string]Provider{} + +func RegisterProvider(provider Provider) { + names := append([]string{provider.Name()}, provider.Aliases()...) + for _, name := range names { + key := normalizeProviderName(name) + if key == "" { + panic("provider name is empty") + } + if providerRegistry[key] != nil { + panic("provider already registered: " + key) + } + providerRegistry[key] = provider + } +} + +func ProviderFor(name string) (Provider, error) { + provider := providerRegistry[normalizeProviderName(name)] + if provider == nil { + return nil, exit(2, "unknown provider %q", name) + } + return provider, nil +} + +func registeredProviders() []Provider { + seen := map[string]struct{}{} + providers := make([]Provider, 0, len(providerRegistry)) + for _, provider := range providerRegistry { + name := normalizeProviderName(provider.Name()) + if _, ok := seen[name]; ok { + continue + } + seen[name] = struct{}{} + providers = append(providers, provider) + } + sort.Slice(providers, func(i, j int) bool { + return providers[i].Name() < providers[j].Name() + }) + return providers +} + +func normalizeProviderName(name string) string { + return strings.ToLower(strings.TrimSpace(name)) +} + +type providerFlagValues map[string]any + +func registerProviderFlags(fs *flag.FlagSet, defaults Config) providerFlagValues { + values := providerFlagValues{} + for _, provider := range registeredProviders() { + values[provider.Name()] = provider.RegisterFlags(fs, defaults) + } + return values +} + +func applyProviderFlags(cfg *Config, fs *flag.FlagSet, values providerFlagValues) error { + provider, err := ProviderFor(cfg.Provider) + if err != nil { + return err + } + return provider.ApplyFlags(cfg, fs, values[provider.Name()]) +} + +func runtimeForApp(a App) Runtime { + return Runtime{Stdout: a.Stdout, Stderr: a.Stderr, Clock: realClock{}, Exec: execCommandRunner{}} +} + +func loadBackend(cfg Config, rt Runtime) (Backend, error) { + if rt.Stdout == nil { + rt.Stdout = io.Discard + } + if rt.Stderr == nil { + rt.Stderr = io.Discard + } + if rt.Clock == nil { + rt.Clock = realClock{} + } + if rt.Exec == nil { + rt.Exec = execCommandRunner{} + } + provider, err := ProviderFor(cfg.Provider) + if err != nil { + return nil, err + } + backend, err := provider.Configure(cfg, rt) + if err != nil { + return nil, err + } + if ssh, ok := backend.(SSHLeaseBackend); ok && shouldUseCoordinator(cfg, provider.Spec()) { + coord, _, err := newCoordinatorClient(cfg) + if err != nil { + return nil, err + } + return &coordinatorLeaseBackend{spec: provider.Spec(), cfg: cfg, direct: ssh, coord: coord, rt: rt}, nil + } + return backend, nil +} + +func shouldUseCoordinator(cfg Config, spec ProviderSpec) bool { + return spec.Coordinator == CoordinatorSupported && strings.TrimSpace(cfg.Coordinator) != "" +} + +func backendCoordinator(backend Backend) *CoordinatorClient { + if b, ok := backend.(*coordinatorLeaseBackend); ok { + return b.coord + } + return nil +} + +func leaseOptionsFromConfig(cfg Config) LeaseOptions { + return LeaseOptions{ + TargetOS: cfg.TargetOS, + WindowsMode: cfg.WindowsMode, + Class: cfg.Class, + ServerType: cfg.ServerType, + IdleTimeout: cfg.IdleTimeout, + TTL: cfg.TTL, + Desktop: cfg.Desktop, + Browser: cfg.Browser, + Code: cfg.Code, + Tailscale: cfg.Tailscale, + WorkRoot: cfg.WorkRoot, + SSHUser: cfg.SSHUser, + SSHPort: cfg.SSHPort, + SSHKey: cfg.SSHKey, + Sync: cfg.Sync, + Results: cfg.Results, + EnvAllow: cfg.EnvAllow, + } +} + +func validateActionsRunnerCapability(backend Backend, cfg Config) error { + if _, ok := backend.(SSHLeaseBackend); !ok { + return exit(2, "--actions-runner requires an SSH lease provider") + } + if cfg.TargetOS != targetLinux { + return exit(2, "--actions-runner requires target=linux") + } + return nil +} + +func featureSetHas(features FeatureSet, feature Feature) bool { + for _, candidate := range features { + if candidate == feature { + return true + } + } + return false +} + +func rejectDelegatedSyncOptions(provider string, req RunRequest) error { + if req.SyncOnly { + return exit(2, "%s delegates sync; --sync-only is not supported", provider) + } + if req.ChecksumSync { + return exit(2, "%s delegates sync; --checksum is not supported", provider) + } + if req.ForceSyncLarge { + return exit(2, "%s delegates sync; --force-sync-large is not supported", provider) + } + return nil +} + +func renderServerList(stdout io.Writer, servers []Server) { + for _, s := range servers { + extra := "" + if orphan := strings.TrimSpace(s.Labels["orphan"]); orphan != "" { + extra = " " + orphan + } + fmt.Fprintf(stdout, "%-20s %-28s %-12s %-14s %-15s lease=%s slug=%s keep=%s target=%s%s\n", + s.DisplayID(), s.Name, s.Status, s.ServerType.Name, s.PublicNet.IPv4.IP, s.Labels["lease"], blank(serverSlug(s), "-"), s.Labels["keep"], s.Labels["target"], extra) + } +} + +func (a App) touchLeaseTargetBestEffort(ctx context.Context, cfg Config, lease LeaseTarget, state string) Server { + backend, err := loadBackend(cfg, runtimeForApp(a)) + if err != nil { + fmt.Fprintf(a.Stderr, "warning: touch failed for %s: %v\n", lease.LeaseID, err) + return lease.Server + } + sshBackend, ok := backend.(SSHLeaseBackend) + if !ok { + fmt.Fprintf(a.Stderr, "warning: provider=%s does not support lease touch\n", backend.Spec().Name) + return lease.Server + } + if state == "" { + state = blank(lease.Server.Labels["state"], "ready") + } + server, err := sshBackend.Touch(ctx, TouchRequest{Lease: lease, State: state, IdleTimeout: cfg.IdleTimeout}) + if err != nil { + fmt.Fprintf(a.Stderr, "warning: touch failed for %s: %v\n", lease.LeaseID, err) + return lease.Server + } + return server +} diff --git a/internal/cli/provider_backend_test.go b/internal/cli/provider_backend_test.go new file mode 100644 index 0000000..48079d8 --- /dev/null +++ b/internal/cli/provider_backend_test.go @@ -0,0 +1,171 @@ +package cli + +import ( + "context" + "io" + "testing" +) + +type recordingCommandRunner struct { + calls []LocalCommandRequest + result LocalCommandResult + err error +} + +func (r *recordingCommandRunner) Run(_ context.Context, req LocalCommandRequest) (LocalCommandResult, error) { + r.calls = append(r.calls, req) + return r.result, r.err +} + +func testRuntimeWithRunner(r CommandRunner) Runtime { + return Runtime{Stdout: io.Discard, Stderr: io.Discard, Clock: realClock{}, Exec: r} +} + +func TestProviderRegistryCanonicalAndAliases(t *testing.T) { + for _, name := range []string{"hetzner", "aws", "ssh", "static", "static-ssh", "blacksmith", "blacksmith-testbox"} { + if _, err := ProviderFor(name); err != nil { + t.Fatalf("ProviderFor(%q): %v", name, err) + } + } + if _, err := ProviderFor("missing"); err == nil { + t.Fatal("expected missing provider to fail") + } +} + +func TestLoadBackendWrapsCoordinatorOnlyForSupportedSSHProviders(t *testing.T) { + cfg := baseConfig() + cfg.Provider = "aws" + cfg.Coordinator = "https://coordinator.example" + backend, err := loadBackend(cfg, testRuntimeWithRunner(&recordingCommandRunner{})) + if err != nil { + t.Fatalf("load aws coordinator backend: %v", err) + } + if _, ok := backend.(*coordinatorLeaseBackend); !ok { + t.Fatalf("backend=%T, want coordinatorLeaseBackend", backend) + } + + cfg.Provider = "ssh" + backend, err = loadBackend(cfg, testRuntimeWithRunner(&recordingCommandRunner{})) + if err != nil { + t.Fatalf("load static ssh backend: %v", err) + } + if _, ok := backend.(*coordinatorLeaseBackend); ok { + t.Fatalf("static ssh unexpectedly used coordinator wrapper") + } + + cfg.Provider = "blacksmith-testbox" + backend, err = loadBackend(cfg, testRuntimeWithRunner(&recordingCommandRunner{})) + if err != nil { + t.Fatalf("load blacksmith backend: %v", err) + } + if _, ok := backend.(DelegatedRunBackend); !ok { + t.Fatalf("backend=%T, want delegated run backend", backend) + } +} + +func TestLeaseCreateFlagsApplySelectedProviderFlags(t *testing.T) { + defaults := baseConfig() + fs := newFlagSet("test", io.Discard) + values := registerLeaseCreateFlags(fs, defaults) + if err := parseFlags(fs, []string{ + "--provider", "blacksmith-testbox", + "--blacksmith-org", "openclaw", + "--blacksmith-workflow", ".github/workflows/testbox.yml", + "--blacksmith-job", "test", + "--blacksmith-ref", "feature", + }); err != nil { + t.Fatal(err) + } + cfg := baseConfig() + if err := applyLeaseCreateFlags(&cfg, fs, values); err != nil { + t.Fatal(err) + } + if cfg.Blacksmith.Org != "openclaw" || cfg.Blacksmith.Workflow != ".github/workflows/testbox.yml" || cfg.Blacksmith.Job != "test" || cfg.Blacksmith.Ref != "feature" { + t.Fatalf("blacksmith flags not applied through provider registry: %#v", cfg.Blacksmith) + } +} + +func TestValidateRequestedCapabilitiesUsesProviderSpec(t *testing.T) { + cfg := baseConfig() + cfg.Provider = "blacksmith-testbox" + cfg.Desktop = true + if err := validateRequestedCapabilities(cfg); err == nil { + t.Fatal("expected blacksmith desktop capability rejection") + } + + cfg = baseConfig() + cfg.Provider = "hetzner" + cfg.Desktop = true + if err := validateRequestedCapabilities(cfg); err != nil { + t.Fatalf("hetzner desktop capability rejected: %v", err) + } +} + +func TestBlacksmithBackendUsesInjectedCommandRunnerForListAndStatus(t *testing.T) { + runner := &recordingCommandRunner{ + result: LocalCommandResult{ + Stdout: "tbx_123 ready openclaw .github/workflows/testbox.yml test main 2026-05-06T00:00:00Z\n", + }, + } + cfg := baseConfig() + cfg.Provider = "blacksmith-testbox" + cfg.Blacksmith.Workflow = ".github/workflows/testbox.yml" + cfg.Blacksmith.Job = "test" + cfg.Blacksmith.Ref = "main" + backend, err := loadBackend(cfg, testRuntimeWithRunner(runner)) + if err != nil { + t.Fatalf("load blacksmith backend: %v", err) + } + delegated := backend.(DelegatedRunBackend) + servers, err := delegated.List(context.Background(), ListRequest{Options: leaseOptionsFromConfig(cfg)}) + if err != nil { + t.Fatalf("list: %v", err) + } + if len(servers) != 1 || servers[0].CloudID != "tbx_123" { + t.Fatalf("servers=%#v", servers) + } + state, err := delegated.Status(context.Background(), StatusRequest{Options: leaseOptionsFromConfig(cfg), ID: "tbx_123"}) + if err != nil { + t.Fatalf("status: %v", err) + } + if !state.Ready || state.ID != "tbx_123" { + t.Fatalf("state=%#v", state) + } + if len(runner.calls) != 2 { + t.Fatalf("runner calls=%d, want 2", len(runner.calls)) + } + for _, call := range runner.calls { + if call.Name != "blacksmith" { + t.Fatalf("command name=%q", call.Name) + } + } +} + +func TestBlacksmithBackendListJSONKeepsParsedTableShape(t *testing.T) { + runner := &recordingCommandRunner{ + result: LocalCommandResult{ + Stdout: "tbx_123 ready openclaw .github/workflows/testbox.yml test main 2026-05-06T00:00:00Z\n", + }, + } + cfg := baseConfig() + cfg.Provider = "blacksmith-testbox" + backend, err := loadBackend(cfg, testRuntimeWithRunner(runner)) + if err != nil { + t.Fatalf("load blacksmith backend: %v", err) + } + jsonBackend, ok := backend.(JSONListBackend) + if !ok { + t.Fatalf("backend=%T, want JSONListBackend", backend) + } + view, err := jsonBackend.ListJSON(context.Background(), ListRequest{Options: leaseOptionsFromConfig(cfg)}) + if err != nil { + t.Fatalf("list json: %v", err) + } + items, ok := view.([]blacksmithListItem) + if !ok { + t.Fatalf("view=%T, want []blacksmithListItem", view) + } + if len(items) != 1 || items[0].ID != "tbx_123" || items[0].Repo != "openclaw" { + t.Fatalf("items=%#v", items) + } +} diff --git a/internal/cli/provider_blacksmith.go b/internal/cli/provider_blacksmith.go new file mode 100644 index 0000000..0caaaf6 --- /dev/null +++ b/internal/cli/provider_blacksmith.go @@ -0,0 +1,357 @@ +package cli + +import ( + "context" + "flag" + "fmt" + "io" + "strings" + "time" +) + +func RegisterBlacksmithProviderFlags(fs *flag.FlagSet, defaults Config) any { + return registerBlacksmithFlags(fs, defaults) +} + +func ApplyBlacksmithProviderFlags(cfg *Config, fs *flag.FlagSet, values any) error { + if v, ok := values.(blacksmithFlagValues); ok { + applyBlacksmithFlagOverrides(cfg, fs, v) + } + return nil +} + +func NewBlacksmithBackend(spec ProviderSpec, cfg Config, rt Runtime) Backend { + cfg.Provider = blacksmithTestboxProvider + return &blacksmithBackend{spec: spec, cfg: cfg, rt: rt} +} + +type blacksmithBackend struct { + spec ProviderSpec + cfg Config + rt Runtime +} + +func (b *blacksmithBackend) Spec() ProviderSpec { return b.spec } + +func (b *blacksmithBackend) Warmup(ctx context.Context, req WarmupRequest) error { + if req.ActionsRunner { + return exit(2, "--actions-runner is not supported for provider=%s; Blacksmith owns runner hydration", b.cfg.Provider) + } + started := b.rt.Clock.Now() + leaseID, slug, err := b.warmupLease(ctx, req.Repo, req.Reclaim) + if err != nil { + return err + } + fmt.Fprintf(b.rt.Stdout, "leased %s slug=%s provider=%s idle_timeout=%s\n", leaseID, slug, blacksmithTestboxProvider, blacksmithIdleTimeout(b.cfg)) + if !req.Keep { + fmt.Fprintf(b.rt.Stderr, "warning: blacksmith warmup keeps the testbox until idle timeout or explicit stop\n") + } + fmt.Fprintf(b.rt.Stdout, "warmup complete total=%s\n", b.rt.Clock.Now().Sub(started).Round(time.Millisecond)) + if req.TimingJSON { + total := b.rt.Clock.Now().Sub(started) + if err := writeTimingJSON(b.rt.Stderr, timingReport{ + Provider: blacksmithTestboxProvider, + LeaseID: leaseID, + Slug: slug, + TotalMs: total.Milliseconds(), + ExitCode: 0, + }); err != nil { + return err + } + } + return nil +} + +func (b *blacksmithBackend) Run(ctx context.Context, req RunRequest) (RunResult, error) { + if err := rejectDelegatedSyncOptions(blacksmithTestboxProvider, req); err != nil { + return RunResult{}, err + } + started := b.rt.Clock.Now() + leaseID := req.ID + acquired := false + var err error + if leaseID == "" { + leaseID, _, err = b.warmupLease(ctx, req.Repo, req.Reclaim) + if err != nil { + return RunResult{}, err + } + acquired = true + } else { + leaseID, err = resolveBlacksmithLeaseID(leaseID, req.Repo.Root, req.Reclaim) + if err != nil { + return RunResult{}, err + } + slug, err := blacksmithClaimSlug(req.ID, leaseID) + if err != nil { + return RunResult{}, err + } + if err := claimLeaseForRepoProvider(leaseID, slug, blacksmithTestboxProvider, req.Repo.Root, blacksmithIdleTimeout(b.cfg), req.Reclaim); err != nil { + return RunResult{}, err + } + } + if acquired && !req.Keep { + defer func() { + if err := b.Stop(context.Background(), StopRequest{ID: leaseID}); err != nil { + fmt.Fprintf(b.rt.Stderr, "warning: blacksmith stop failed for %s: %v\n", leaseID, err) + return + } + removeLeaseClaim(leaseID) + removeStoredTestboxKey(leaseID) + }() + } + fmt.Fprintf(b.rt.Stderr, "provider=blacksmith-testbox id=%s sync=delegated auth=blacksmith\n", leaseID) + commandStart := b.rt.Clock.Now() + code := b.runTestbox(ctx, leaseID, req.Command, req.DebugSync, req.ShellMode) + commandDuration := b.rt.Clock.Now().Sub(commandStart) + total := b.rt.Clock.Now().Sub(started) + fmt.Fprintf(b.rt.Stderr, "blacksmith run summary sync=delegated command=%s total=%s exit=%d\n", commandDuration.Round(time.Millisecond), total.Round(time.Millisecond), code) + if req.TimingJSON { + if err := writeTimingJSON(b.rt.Stderr, timingReport{ + Provider: blacksmithTestboxProvider, + LeaseID: leaseID, + SyncPhases: []timingPhase{{Name: "delegated", Skipped: true, Reason: "blacksmith-testbox owns sync"}}, + SyncDelegated: true, + CommandMs: commandDuration.Milliseconds(), + TotalMs: total.Milliseconds(), + ExitCode: code, + }); err != nil { + return RunResult{}, err + } + } + result := RunResult{ExitCode: code, Command: commandDuration, Total: total, SyncDelegated: true} + if code != 0 { + return result, ExitError{Code: code, Message: fmt.Sprintf("blacksmith testbox run exited %d", code)} + } + return result, nil +} + +func (b *blacksmithBackend) List(ctx context.Context, req ListRequest) ([]Server, error) { + _ = req + out, err := b.commandOutput(ctx, blacksmithListArgs(b.cfg)) + if err != nil { + return nil, err + } + items := parseBlacksmithList(out) + servers := make([]Server, 0, len(items)) + for _, item := range items { + servers = append(servers, blacksmithItemToServer(item)) + } + return servers, nil +} + +func (b *blacksmithBackend) ListJSON(ctx context.Context, req ListRequest) (any, error) { + _ = req + out, err := b.commandOutput(ctx, blacksmithListArgs(b.cfg)) + if err != nil { + return nil, err + } + return parseBlacksmithList(out), nil +} + +func (b *blacksmithBackend) Status(ctx context.Context, req StatusRequest) (statusView, error) { + leaseID, err := resolveBlacksmithLeaseID(req.ID, "", false) + if err != nil { + return statusView{}, err + } + deadline := b.rt.Clock.Now().Add(req.WaitTimeout) + for { + state, err := b.blacksmithStatusView(ctx, leaseID) + if err != nil { + return statusView{}, err + } + if !req.Wait || state.Ready { + return state, nil + } + if b.rt.Clock.Now().After(deadline) { + return statusView{}, exit(5, "timed out waiting for %s to become ready", req.ID) + } + time.Sleep(5 * time.Second) + } +} + +func (b *blacksmithBackend) Stop(ctx context.Context, req StopRequest) error { + leaseID, err := resolveBlacksmithLeaseID(req.ID, "", false) + if err != nil { + return err + } + if _, err := b.runCommand(ctx, blacksmithStopArgs(b.cfg, leaseID), b.rt.Stdout, b.rt.Stderr); err != nil { + return err + } + removeLeaseClaim(leaseID) + removeStoredTestboxKey(leaseID) + return nil +} + +func (b *blacksmithBackend) warmupLease(ctx context.Context, repo Repo, reclaim bool) (string, string, error) { + pendingID := "tbx_pending_" + strings.TrimPrefix(newLeaseID(), "cbx_") + cleanupKeyID := pendingID + defer func() { + if cleanupKeyID != "" { + removeStoredTestboxKey(cleanupKeyID) + } + }() + _, publicKey, err := ensureTestboxKey(pendingID) + if err != nil { + return "", "", err + } + args, err := blacksmithWarmupArgs(b.cfg, publicKey) + if err != nil { + return "", "", err + } + beforeWarmup := b.listIDsBestEffort(ctx) + result, err := b.runCommand(ctx, args, b.rt.Stdout, b.rt.Stderr) + output := result.Stdout + result.Stderr + if err != nil { + b.cleanupFailedWarmup(ctx, beforeWarmup, output) + return "", "", exit(result.ExitCode, "blacksmith testbox warmup failed: %v", err) + } + leaseID := parseBlacksmithID(output) + if leaseID == "" { + return "", "", exit(5, "blacksmith testbox warmup did not print a tbx_ id") + } + if err := moveStoredTestboxKey(pendingID, leaseID); err != nil { + _ = b.Stop(ctx, StopRequest{ID: leaseID}) + return "", "", exit(2, "store blacksmith key for %s: %v", leaseID, err) + } + cleanupKeyID = leaseID + slug := newLeaseSlug(leaseID) + if err := claimLeaseForRepoProvider(leaseID, slug, blacksmithTestboxProvider, repo.Root, blacksmithIdleTimeout(b.cfg), reclaim); err != nil { + _ = b.Stop(ctx, StopRequest{ID: leaseID}) + return "", "", err + } + cleanupKeyID = "" + return leaseID, slug, nil +} + +func (b *blacksmithBackend) runTestbox(ctx context.Context, leaseID string, command []string, debug, shellMode bool) int { + keyPath, err := testboxKeyPath(leaseID) + if err != nil { + fmt.Fprintf(b.rt.Stderr, "blacksmith key path failed: %v\n", err) + return 2 + } + args := blacksmithRunArgs(b.cfg, leaseID, keyPath, command, debug || b.cfg.Blacksmith.Debug, shellMode) + result, err := b.runCommand(ctx, args, b.rt.Stdout, b.rt.Stderr) + if err != nil { + return result.ExitCode + } + return 0 +} + +func (b *blacksmithBackend) commandOutput(ctx context.Context, args []string) (string, error) { + result, err := b.runCommand(ctx, args, nil, nil) + if err != nil { + return "", ExitError{Code: result.ExitCode, Message: fmt.Sprintf("blacksmith failed: %v: %s", err, strings.TrimSpace(result.Stdout+result.Stderr))} + } + return result.Stdout + result.Stderr, nil +} + +func (b *blacksmithBackend) runCommand(ctx context.Context, args []string, stdout, stderr io.Writer) (LocalCommandResult, error) { + result, err := b.rt.Exec.Run(ctx, LocalCommandRequest{Name: "blacksmith", Args: args, Stdout: stdout, Stderr: stderr}) + if err != nil { + return result, ExitError{Code: result.ExitCode, Message: fmt.Sprintf("blacksmith failed: %v", err)} + } + return result, nil +} + +func (b *blacksmithBackend) listIDsBestEffort(ctx context.Context) map[string]bool { + out, err := b.commandOutput(ctx, blacksmithListAllArgs(b.cfg)) + if err != nil { + return map[string]bool{} + } + ids := map[string]bool{} + for _, item := range parseBlacksmithList(out) { + ids[item.ID] = true + } + return ids +} + +func (b *blacksmithBackend) cleanupFailedWarmup(ctx context.Context, before map[string]bool, output string) { + if leaseID := parseBlacksmithID(output); leaseID != "" { + if err := b.Stop(ctx, StopRequest{ID: leaseID}); err == nil { + before[leaseID] = true + } + } + stoppedAny := false + quietAttempts := 0 + for attempt := 0; attempt < blacksmithCleanupAttempts; attempt++ { + if attempt > 0 { + select { + case <-ctx.Done(): + return + case <-time.After(blacksmithCleanupDelay): + } + } + list, err := b.commandOutput(ctx, blacksmithListAllArgs(b.cfg)) + if err != nil { + return + } + stopped := false + for _, item := range parseBlacksmithList(list) { + if before[item.ID] || !blacksmithMatchesConfig(item, b.cfg) { + continue + } + _ = b.Stop(ctx, StopRequest{ID: item.ID}) + before[item.ID] = true + stopped = true + } + if stopped { + stoppedAny = true + quietAttempts = 0 + continue + } + if stoppedAny { + quietAttempts++ + if quietAttempts >= blacksmithCleanupQuiet { + return + } + } + } +} + +func (b *blacksmithBackend) blacksmithStatusView(ctx context.Context, leaseID string) (statusView, error) { + out, err := b.commandOutput(ctx, blacksmithListAllArgs(b.cfg)) + if err != nil { + return statusView{}, err + } + for _, item := range parseBlacksmithList(out) { + if item.ID != leaseID { + continue + } + server := blacksmithItemToServer(item) + return statusView{ + ID: item.ID, + Provider: blacksmithTestboxProvider, + TargetOS: targetLinux, + State: item.Status, + ServerID: item.ID, + ServerType: "testbox", + Labels: server.Labels, + HasHost: false, + Ready: strings.EqualFold(item.Status, "ready") || strings.EqualFold(item.Status, "running"), + IdleTimeout: blacksmithIdleTimeout(b.cfg).String(), + }, nil + } + return statusView{}, exit(4, "blacksmith testbox not found: %s", leaseID) +} + +func blacksmithItemToServer(item blacksmithListItem) Server { + labels := map[string]string{ + "lease": item.ID, + "provider": blacksmithTestboxProvider, + "state": item.Status, + "repo": item.Repo, + "workflow": item.Workflow, + "job": item.Job, + "ref": item.Ref, + "created": item.Created, + } + server := Server{ + CloudID: item.ID, + Provider: blacksmithTestboxProvider, + Name: item.ID, + Status: item.Status, + Labels: labels, + } + server.ServerType.Name = "testbox" + return server +} diff --git a/internal/cli/provider_coordinator.go b/internal/cli/provider_coordinator.go new file mode 100644 index 0000000..8dbe09d --- /dev/null +++ b/internal/cli/provider_coordinator.go @@ -0,0 +1,165 @@ +package cli + +import ( + "context" + "fmt" + "strings" +) + +type coordinatorLeaseBackend struct { + spec ProviderSpec + cfg Config + direct SSHLeaseBackend + coord *CoordinatorClient + rt Runtime +} + +func (b *coordinatorLeaseBackend) Spec() ProviderSpec { return b.spec } + +func (b *coordinatorLeaseBackend) Acquire(ctx context.Context, req AcquireRequest) (LeaseTarget, error) { + return acquireAttemptsRetry(b.rt, req.Keep, func() (LeaseTarget, error) { + return b.acquireOnce(ctx, req.Keep) + }) +} + +func (b *coordinatorLeaseBackend) acquireOnce(ctx context.Context, keep bool) (LeaseTarget, error) { + leaseID := newLeaseID() + slug := newLeaseSlug(leaseID) + keyPath, publicKey, err := ensureTestboxKeyForConfig(b.cfg, leaseID) + if err != nil { + return LeaseTarget{}, err + } + cfg := b.cfg + cfg.SSHKey = keyPath + cfg.ProviderKey = providerKeyForLease(leaseID) + if cfg.Tailscale.Enabled && cfg.Tailscale.Hostname == "" { + cfg.Tailscale.Hostname = renderTailscaleHostname(cfg.Tailscale.HostnameTemplate, leaseID, slug, cfg.Provider) + } + ensureAWSSSHCIDRs(ctx, &cfg) + fmt.Fprintf(b.rt.Stderr, "coordinator lease class=%s preferred_type=%s keep=%v slug=%s idle_timeout=%s ttl=%s\n", cfg.Class, cfg.ServerType, keep, slug, cfg.IdleTimeout, cfg.TTL) + lease, err := b.coord.CreateLease(ctx, cfg, publicKey, keep, leaseID, slug) + if err != nil { + return LeaseTarget{}, err + } + if lease.ID != "" && lease.ID != leaseID { + if err := moveStoredTestboxKey(leaseID, lease.ID); err != nil { + fmt.Fprintf(b.rt.Stderr, "warning: could not move local key from %s to %s: %v\n", leaseID, lease.ID, err) + } + } + if err := validateCoordinatorLeaseCapabilities(cfg, lease); err != nil { + if releaseErr := releaseCoordinatorLease(context.Background(), b.coord, blank(lease.ID, leaseID)); releaseErr != nil { + fmt.Fprintf(b.rt.Stderr, "warning: release failed after capability mismatch for %s: %v\n", blank(lease.ID, leaseID), releaseErr) + } + return LeaseTarget{}, err + } + server, target, leaseID := leaseToServerTarget(lease, cfg) + fmt.Fprintf(b.rt.Stderr, "leased %s slug=%s server=%d type=%s ip=%s via coordinator\n", leaseID, blank(lease.Slug, "-"), server.ID, server.ServerType.Name, target.Host) + if summary := coordinatorFallbackSummary(lease); summary != "" { + fmt.Fprintf(b.rt.Stderr, "fallback resolved %s\n", summary) + } + waitCtx, cancelWait := context.WithCancelCause(ctx) + defer cancelWait(nil) + stopHeartbeat := startCoordinatorHeartbeat(waitCtx, b.coord, leaseID, cfg.IdleTimeout, nil, b.rt.Stderr) + defer stopHeartbeat() + stopLeaseWatch := startCoordinatorLeaseWatch(waitCtx, b.coord, leaseID, cancelWait, b.rt.Stderr) + defer stopLeaseWatch() + if err := bootstrapAWSWindowsDesktop(waitCtx, cfg, &target, publicKey, b.rt.Stderr); err != nil { + if releaseErr := releaseCoordinatorLease(context.Background(), b.coord, leaseID); releaseErr != nil { + fmt.Fprintf(b.rt.Stderr, "warning: release failed after bootstrap error for %s: %v\n", leaseID, releaseErr) + } + return LeaseTarget{}, err + } + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID, Coordinator: b.coord}, nil +} + +func (b *coordinatorLeaseBackend) Resolve(ctx context.Context, req ResolveRequest) (LeaseTarget, error) { + lease, err := b.coord.GetLease(ctx, req.ID) + if err != nil { + return LeaseTarget{}, err + } + server, target, leaseID := leaseToServerTarget(lease, b.cfg) + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID, Coordinator: b.coord}, nil +} + +func (b *coordinatorLeaseBackend) List(ctx context.Context, req ListRequest) ([]Server, error) { + machines, activeLeaseIDs, err := b.listMachines(ctx) + if err != nil { + return nil, err + } + return coordinatorMachinesToServers(machines, activeLeaseIDs), nil +} + +func (b *coordinatorLeaseBackend) ListJSON(ctx context.Context, req ListRequest) (any, error) { + _ = req + machines, _, err := b.listMachines(ctx) + if err != nil { + return nil, err + } + return machines, nil +} + +func (b *coordinatorLeaseBackend) listMachines(ctx context.Context) ([]CoordinatorMachine, map[string]struct{}, error) { + if b.cfg.CoordAdminToken == "" { + return nil, nil, exit(2, "pool list requires broker.adminToken or CRABBOX_COORDINATOR_ADMIN_TOKEN when a coordinator is configured") + } + cfg := b.cfg + cfg.CoordToken = cfg.CoordAdminToken + coord, _, err := newCoordinatorClient(cfg) + if err != nil { + return nil, nil, err + } + machines, err := coord.Pool(ctx, cfg) + if err != nil { + return nil, nil, err + } + activeLeases, err := coord.AdminLeases(ctx, "active", "", "", 1000) + if err != nil { + fmt.Fprintf(b.rt.Stderr, "warning: active lease lookup failed; orphan status unavailable: %v\n", err) + return machines, nil, nil + } + return machines, activeCoordinatorLeaseIDs(activeLeases), nil +} + +func (b *coordinatorLeaseBackend) ReleaseLease(ctx context.Context, req ReleaseLeaseRequest) error { + if req.Lease.LeaseID == "" { + return exit(2, "missing coordinator lease id") + } + if err := releaseCoordinatorLease(ctx, b.coord, req.Lease.LeaseID); err != nil { + return err + } + removeLeaseClaim(req.Lease.LeaseID) + return nil +} + +func (b *coordinatorLeaseBackend) Touch(ctx context.Context, req TouchRequest) (Server, error) { + lease, err := b.coord.TouchLease(ctx, req.Lease.LeaseID) + if err != nil { + return req.Lease.Server, err + } + server, _, _ := leaseToServerTarget(lease, b.cfg) + return server, nil +} + +func coordinatorMachinesToServers(machines []CoordinatorMachine, activeLeaseIDs map[string]struct{}) []Server { + servers := make([]Server, 0, len(machines)) + for _, machine := range machines { + labels := map[string]string{} + for k, v := range machine.Labels { + labels[k] = v + } + if activeLeaseIDs != nil { + labels["orphan"] = strings.TrimSpace(coordinatorMachineOrphanField(labels, activeLeaseIDs)) + } + server := Server{ + CloudID: string(machine.ID), + Provider: machine.Provider, + Name: machine.Name, + Status: machine.Status, + Labels: labels, + } + server.ServerType.Name = machine.ServerType + server.PublicNet.IPv4.IP = machine.Host + servers = append(servers, server) + } + return servers +} diff --git a/internal/cli/provider_hetzner.go b/internal/cli/provider_hetzner.go new file mode 100644 index 0000000..e6c5914 --- /dev/null +++ b/internal/cli/provider_hetzner.go @@ -0,0 +1,124 @@ +package cli + +import ( + "context" + "fmt" +) + +type hetznerLeaseBackend struct{ directSSHBackend } + +func (b *hetznerLeaseBackend) Acquire(ctx context.Context, req AcquireRequest) (LeaseTarget, error) { + return acquireAttemptsRetry(b.rt, req.Keep, func() (LeaseTarget, error) { + return b.acquireOnce(ctx, req.Keep) + }) +} + +func (b *hetznerLeaseBackend) acquireOnce(ctx context.Context, keep bool) (LeaseTarget, error) { + if b.cfg.Tailscale.Enabled && b.cfg.Tailscale.AuthKey == "" { + return LeaseTarget{}, exit(2, "direct --tailscale requires %s to contain a Tailscale auth key; brokered mode uses coordinator OAuth secrets", b.cfg.Tailscale.AuthKeyEnv) + } + client, err := newHetznerClient() + if err != nil { + return LeaseTarget{}, err + } + leaseID := newLeaseID() + servers, err := client.ListCrabboxServers(ctx) + if err != nil { + return LeaseTarget{}, err + } + slug := allocateDirectLeaseSlug(leaseID, servers) + cfg := b.cfg + keyPath, publicKey, err := ensureTestboxKeyForConfig(cfg, leaseID) + if err != nil { + return LeaseTarget{}, err + } + cfg.SSHKey = keyPath + cfg.ProviderKey = providerKeyForLease(leaseID) + if cfg.ProviderKey != "" { + providerKey, err := client.EnsureSSHKey(ctx, cfg.ProviderKey, publicKey) + if err != nil { + return LeaseTarget{}, err + } + cfg.ProviderKey = providerKey.Name + } + fmt.Fprintf(b.rt.Stderr, "provisioning provider=hetzner lease=%s slug=%s class=%s preferred_type=%s location=%s keep=%v\n", leaseID, slug, cfg.Class, cfg.ServerType, cfg.Location, keep) + server, cfg, err := client.CreateServerWithFallback(ctx, cfg, publicKey, leaseID, slug, keep, func(format string, args ...any) { + fmt.Fprintf(b.rt.Stderr, format, args...) + }) + if err != nil { + return LeaseTarget{}, err + } + fmt.Fprintf(b.rt.Stderr, "provisioned lease=%s server=%d type=%s\n", leaseID, server.ID, cfg.ServerType) + server, err = waitForServerIP(ctx, client, server.ID) + if err != nil { + return LeaseTarget{}, err + } + target := sshTargetFromConfig(cfg, server.PublicNet.IPv4.IP) + if err := waitForSSHReady(ctx, &target, b.rt.Stderr, "bootstrap", bootstrapWaitTimeout(cfg)); err != nil { + _ = deleteServer(context.Background(), cfg, server) + return LeaseTarget{}, err + } + server.Labels["state"] = "ready" + if err := client.SetLabels(ctx, server.ID, server.Labels); err != nil { + fmt.Fprintf(b.rt.Stderr, "warning: set labels: %v\n", err) + } + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, nil +} + +func (b *hetznerLeaseBackend) Resolve(ctx context.Context, req ResolveRequest) (LeaseTarget, error) { + client, err := newHetznerClient() + if err != nil { + return LeaseTarget{}, err + } + if serverID, ok := parseServerID(req.ID); ok { + server, err := client.GetServer(ctx, serverID) + if err != nil { + return LeaseTarget{}, err + } + leaseID := blank(server.Labels["lease"], req.ID) + target := sshTargetFromConfig(b.cfg, server.PublicNet.IPv4.IP) + useStoredTestboxKey(&target, leaseID) + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, nil + } + servers, err := client.ListCrabboxServers(ctx) + if err != nil { + return LeaseTarget{}, err + } + if server, leaseID, err := findServerByAlias(servers, req.ID); err != nil { + return LeaseTarget{}, err + } else if leaseID != "" { + target := sshTargetFromConfig(b.cfg, server.PublicNet.IPv4.IP) + useStoredTestboxKey(&target, leaseID) + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, nil + } + return LeaseTarget{}, exit(4, "lease/server not found: %s", req.ID) +} + +func (b *hetznerLeaseBackend) List(ctx context.Context, req ListRequest) ([]LeaseView, error) { + _ = req + client, err := newHetznerClient() + if err != nil { + return nil, err + } + return client.ListCrabboxServers(ctx) +} + +func (b *hetznerLeaseBackend) ReleaseLease(ctx context.Context, req ReleaseLeaseRequest) error { + if err := deleteServer(ctx, b.cfg, req.Lease.Server); err != nil { + return err + } + removeLeaseClaim(req.Lease.LeaseID) + return nil +} + +func (b *hetznerLeaseBackend) Touch(ctx context.Context, req TouchRequest) (Server, error) { + return b.touch(ctx, req.Lease.Server, req.State), nil +} + +func (b *hetznerLeaseBackend) Cleanup(ctx context.Context, req CleanupRequest) error { + servers, err := b.List(ctx, ListRequest{Options: req.Options}) + if err != nil { + return err + } + return b.cleanupServers(ctx, req, servers) +} diff --git a/internal/cli/provider_static.go b/internal/cli/provider_static.go new file mode 100644 index 0000000..de29ec1 --- /dev/null +++ b/internal/cli/provider_static.go @@ -0,0 +1,60 @@ +package cli + +import ( + "context" + "fmt" + "time" +) + +type staticLeaseBackend struct{ directSSHBackend } + +func (b *staticLeaseBackend) Acquire(ctx context.Context, req AcquireRequest) (LeaseTarget, error) { + server, target, leaseID, err := staticLease(b.cfg) + if err != nil { + return LeaseTarget{}, err + } + fmt.Fprintf(b.rt.Stderr, "using static target lease=%s slug=%s target=%s windows_mode=%s host=%s keep=%v\n", leaseID, serverSlug(server), b.cfg.TargetOS, b.cfg.WindowsMode, target.Host, req.Keep) + if err := waitForSSH(ctx, &target, b.rt.Stderr); err != nil { + return LeaseTarget{}, err + } + server.Labels["state"] = "ready" + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, nil +} + +func (b *staticLeaseBackend) Resolve(_ context.Context, req ResolveRequest) (LeaseTarget, error) { + server, target, leaseID, err := staticLease(b.cfg) + if err != nil { + return LeaseTarget{}, err + } + if req.ID == "" || req.ID == leaseID || req.ID == server.Name || req.ID == serverSlug(server) || req.ID == b.cfg.Static.Host { + return LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, nil + } + return LeaseTarget{}, exit(4, "static lease not found: %s", req.ID) +} + +func (b *staticLeaseBackend) List(_ context.Context, req ListRequest) ([]LeaseView, error) { + _ = req + server, _, _, err := staticLease(b.cfg) + if err != nil { + return nil, err + } + return []LeaseView{server}, nil +} + +func (b *staticLeaseBackend) ReleaseLease(_ context.Context, req ReleaseLeaseRequest) error { + removeLeaseClaim(req.Lease.LeaseID) + return nil +} + +func (b *staticLeaseBackend) Touch(_ context.Context, req TouchRequest) (Server, error) { + server := req.Lease.Server + if server.Labels == nil { + server.Labels = map[string]string{} + } + server.Labels = touchDirectLeaseLabels(server.Labels, b.cfg, req.State, time.Now().UTC()) + return server, nil +} + +func (b *staticLeaseBackend) Cleanup(context.Context, CleanupRequest) error { + return exit(2, "machine cleanup is not supported for provider=%s", b.cfg.Provider) +} diff --git a/internal/cli/providers_builtin_test.go b/internal/cli/providers_builtin_test.go new file mode 100644 index 0000000..4573dd5 --- /dev/null +++ b/internal/cli/providers_builtin_test.go @@ -0,0 +1,110 @@ +package cli + +import "flag" + +func init() { + RegisterProvider(testHetznerProvider{}) + RegisterProvider(testAWSProvider{}) + RegisterProvider(testStaticSSHProvider{}) + RegisterProvider(testBlacksmithProvider{}) +} + +type testHetznerProvider struct{} + +func (testHetznerProvider) Name() string { return "hetzner" } +func (testHetznerProvider) Aliases() []string { return nil } +func (testHetznerProvider) Spec() ProviderSpec { + return ProviderSpec{ + Name: "hetzner", + Kind: ProviderKindSSHLease, + Targets: []TargetSpec{{OS: targetLinux}}, + Features: FeatureSet{FeatureSSH, FeatureCrabboxSync, FeatureCleanup, FeatureDesktop, FeatureBrowser, FeatureCode, FeatureTailscale}, + Coordinator: CoordinatorSupported, + } +} +func (testHetznerProvider) RegisterFlags(*flag.FlagSet, Config) any { return noProviderFlags{} } +func (testHetznerProvider) ApplyFlags(*Config, *flag.FlagSet, any) error { + return nil +} +func (p testHetznerProvider) Configure(cfg Config, rt Runtime) (Backend, error) { + return NewHetznerLeaseBackend(p.Spec(), cfg, rt), nil +} + +type testAWSProvider struct{} + +func (testAWSProvider) Name() string { return "aws" } +func (testAWSProvider) Aliases() []string { return nil } +func (testAWSProvider) Spec() ProviderSpec { + return ProviderSpec{ + Name: "aws", + Kind: ProviderKindSSHLease, + Targets: []TargetSpec{ + {OS: targetLinux}, + {OS: targetWindows, WindowsMode: windowsModeNormal}, + {OS: targetWindows, WindowsMode: windowsModeWSL2}, + {OS: targetMacOS}, + }, + Features: FeatureSet{FeatureSSH, FeatureCrabboxSync, FeatureCleanup, FeatureDesktop, FeatureBrowser, FeatureCode}, + Coordinator: CoordinatorSupported, + } +} +func (testAWSProvider) RegisterFlags(*flag.FlagSet, Config) any { return noProviderFlags{} } +func (testAWSProvider) ApplyFlags(*Config, *flag.FlagSet, any) error { + return nil +} +func (p testAWSProvider) Configure(cfg Config, rt Runtime) (Backend, error) { + return NewAWSLeaseBackend(p.Spec(), cfg, rt), nil +} + +type testStaticSSHProvider struct{} + +func (testStaticSSHProvider) Name() string { return staticProvider } +func (testStaticSSHProvider) Aliases() []string { + return []string{"static", "static-ssh"} +} +func (testStaticSSHProvider) Spec() ProviderSpec { + return ProviderSpec{ + Name: staticProvider, + Kind: ProviderKindSSHLease, + Targets: []TargetSpec{ + {OS: targetLinux}, + {OS: targetWindows, WindowsMode: windowsModeNormal}, + {OS: targetWindows, WindowsMode: windowsModeWSL2}, + {OS: targetMacOS}, + }, + Features: FeatureSet{FeatureSSH, FeatureCrabboxSync, FeatureDesktop, FeatureBrowser, FeatureCode}, + Coordinator: CoordinatorNever, + } +} +func (testStaticSSHProvider) RegisterFlags(*flag.FlagSet, Config) any { return noProviderFlags{} } +func (testStaticSSHProvider) ApplyFlags(*Config, *flag.FlagSet, any) error { + return nil +} +func (p testStaticSSHProvider) Configure(cfg Config, rt Runtime) (Backend, error) { + return NewStaticSSHLeaseBackend(p.Spec(), cfg, rt), nil +} + +type testBlacksmithProvider struct{} + +func (testBlacksmithProvider) Name() string { return blacksmithTestboxProvider } +func (testBlacksmithProvider) Aliases() []string { + return []string{"blacksmith"} +} +func (testBlacksmithProvider) Spec() ProviderSpec { + return ProviderSpec{ + Name: blacksmithTestboxProvider, + Kind: ProviderKindDelegatedRun, + Targets: []TargetSpec{{OS: targetLinux}}, + Features: nil, + Coordinator: CoordinatorNever, + } +} +func (testBlacksmithProvider) RegisterFlags(fs *flag.FlagSet, defaults Config) any { + return RegisterBlacksmithProviderFlags(fs, defaults) +} +func (testBlacksmithProvider) ApplyFlags(cfg *Config, fs *flag.FlagSet, values any) error { + return ApplyBlacksmithProviderFlags(cfg, fs, values) +} +func (p testBlacksmithProvider) Configure(cfg Config, rt Runtime) (Backend, error) { + return NewBlacksmithBackend(p.Spec(), cfg, rt), nil +} diff --git a/internal/cli/providers_common.go b/internal/cli/providers_common.go new file mode 100644 index 0000000..e7c2981 --- /dev/null +++ b/internal/cli/providers_common.go @@ -0,0 +1,135 @@ +package cli + +import ( + "context" + "fmt" + "io" + "strings" + "time" +) + +type noProviderFlags struct{} + +func NoProviderFlags() any { return noProviderFlags{} } + +func NewHetznerLeaseBackend(spec ProviderSpec, cfg Config, rt Runtime) Backend { + cfg.Provider = "hetzner" + return &hetznerLeaseBackend{directSSHBackend: directSSHBackend{spec: spec, cfg: cfg, rt: rt}} +} + +func NewAWSLeaseBackend(spec ProviderSpec, cfg Config, rt Runtime) Backend { + cfg.Provider = "aws" + return &awsLeaseBackend{directSSHBackend: directSSHBackend{spec: spec, cfg: cfg, rt: rt}} +} + +func NewStaticSSHLeaseBackend(spec ProviderSpec, cfg Config, rt Runtime) Backend { + cfg.Provider = staticProvider + return &staticLeaseBackend{directSSHBackend: directSSHBackend{spec: spec, cfg: cfg, rt: rt}} +} + +type directSSHBackend struct { + spec ProviderSpec + cfg Config + rt Runtime +} + +func (b *directSSHBackend) Spec() ProviderSpec { return b.spec } + +func (b *directSSHBackend) cleanupServers(ctx context.Context, req CleanupRequest, servers []Server) error { + _ = ctx + _ = req + for _, s := range servers { + shouldDelete, reason := shouldCleanupServer(s, time.Now().UTC()) + if !shouldDelete { + fmt.Fprintf(b.rt.Stderr, "skip server id=%s name=%s reason=%s\n", s.DisplayID(), s.Name, reason) + continue + } + fmt.Fprintf(b.rt.Stderr, "delete server id=%s name=%s\n", s.DisplayID(), s.Name) + if !req.DryRun { + if err := deleteServer(ctx, b.cfg, s); err != nil { + return err + } + } + } + return nil +} + +func (b *directSSHBackend) touch(ctx context.Context, server Server, state string) Server { + return touchDirectLeaseBestEffort(ctx, b.cfg, server, state, b.rt.Stderr) +} + +func touchDirectLeaseBestEffort(ctx context.Context, cfg Config, server Server, state string, stderr io.Writer) Server { + if server.Labels == nil { + server.Labels = map[string]string{} + } + server.Labels = touchDirectLeaseLabels(server.Labels, cfg, state, time.Now().UTC()) + if isStaticProvider(cfg.Provider) || server.Provider == staticProvider { + return server + } + if cfg.Provider == "aws" || server.Provider == "aws" || strings.HasPrefix(server.CloudID, "i-") { + client, err := newAWSClient(ctx, cfg) + if err != nil { + fmt.Fprintf(stderr, "warning: direct touch state=%s: %v\n", state, err) + return server + } + if err := client.SetTags(ctx, server.CloudID, server.Labels); err != nil { + fmt.Fprintf(stderr, "warning: direct touch state=%s: %v\n", state, err) + } + return server + } + client, err := newHetznerClient() + if err != nil { + fmt.Fprintf(stderr, "warning: direct touch state=%s: %v\n", state, err) + return server + } + if err := client.SetLabels(ctx, server.ID, server.Labels); err != nil { + fmt.Fprintf(stderr, "warning: direct touch state=%s: %v\n", state, err) + } + return server +} + +func chooseAWSRegion(ctx context.Context, cfg Config, stderr io.Writer) Config { + if cfg.Provider != "aws" || cfg.Capacity.Market != "spot" || len(cfg.Capacity.Regions) < 2 { + return cfg + } + client, err := newAWSClient(ctx, cfg) + if err != nil { + fmt.Fprintf(stderr, "warning: spot placement score unavailable: %v\n", err) + return cfg + } + scores, err := client.SpotPlacementScores(ctx, cfg) + if err != nil { + fmt.Fprintf(stderr, "warning: spot placement score unavailable: %v\n", err) + return cfg + } + if len(scores) == 0 { + return cfg + } + best := awsString(scores[0].Region) + score := int32(0) + if scores[0].Score != nil { + score = *scores[0].Score + } + if best != "" && best != cfg.AWSRegion { + fmt.Fprintf(stderr, "selected aws region=%s spot_score=%d previous=%s\n", best, score, cfg.AWSRegion) + cfg.AWSRegion = best + } + return cfg +} + +func acquireAttemptsRetry(rt Runtime, keep bool, acquire func() (LeaseTarget, error)) (LeaseTarget, error) { + var lastErr error + attempts := acquireAttempts(keep) + for attempt := 1; attempt <= attempts; attempt++ { + lease, err := acquire() + if err == nil { + return lease, nil + } + lastErr = err + if attempt == attempts || !isBootstrapWaitError(err) { + return LeaseTarget{}, err + } + fmt.Fprintf(rt.Stderr, "warning: bootstrap failed; retrying with fresh lease: %v\n", err) + } + return LeaseTarget{}, lastErr +} diff --git a/internal/cli/run.go b/internal/cli/run.go index 97a84c7..2bcda28 100644 --- a/internal/cli/run.go +++ b/internal/cli/run.go @@ -60,42 +60,42 @@ func (a App) warmup(ctx context.Context, args []string) error { if err != nil { return err } - if isBlacksmithProvider(cfg.Provider) { - if *actionsRunner { - return exit(2, "--actions-runner is not supported for provider=%s; Blacksmith owns runner hydration", cfg.Provider) + backend, err := loadBackend(cfg, runtimeForApp(a)) + if err != nil { + return err + } + options := leaseOptionsFromConfig(cfg) + if delegated, ok := backend.(DelegatedRunBackend); ok { + return delegated.Warmup(ctx, WarmupRequest{Repo: repo, Options: options, Keep: *keep, Reclaim: *reclaim, ActionsRunner: *actionsRunner, TimingJSON: *timingJSON}) + } + sshBackend, ok := backend.(SSHLeaseBackend) + if !ok { + return exit(2, "provider=%s does not support warmup", backend.Spec().Name) + } + if *actionsRunner { + if err := validateActionsRunnerCapability(backend, cfg); err != nil { + return err } - return a.blacksmithWarmup(ctx, cfg, repo, *keep, *reclaim, *timingJSON) - } - - coord, useCoordinator, err := newTargetCoordinatorClient(cfg) - if err != nil { - return err - } - var server Server - var target SSHTarget - var leaseID string - if useCoordinator { - server, target, leaseID, err = a.acquireCoordinatorWithRetry(ctx, cfg, coord, *keep) - } else { - server, target, leaseID, err = a.acquireWithRetry(ctx, cfg, *keep) } + lease, err := sshBackend.Acquire(ctx, AcquireRequest{Repo: repo, Options: options, Keep: *keep, Reclaim: *reclaim}) if err != nil { return err } + server, target, leaseID := lease.Server, lease.SSH, lease.LeaseID applyResolvedServerConfig(&cfg, server) if err := claimLeaseForRepoConfig(leaseID, serverSlug(server), cfg, repo.Root, cfg.IdleTimeout, *reclaim); err != nil { - a.releaseAcquiredLeaseBestEffort(ctx, cfg, coord, useCoordinator, server, target, leaseID) + a.releaseBackendLeaseBestEffort(ctx, sshBackend, LeaseTarget{Server: server, SSH: target, LeaseID: leaseID, Coordinator: lease.Coordinator}) return err } if serverTailscaleMetadata(server).Enabled { if err := waitForSSHReady(ctx, &target, a.Stderr, "tailscale metadata", 2*time.Minute); err == nil { - a.refreshTailscaleMetadata(ctx, cfg, coord, useCoordinator, &server, target, leaseID) + a.refreshTailscaleMetadata(ctx, cfg, lease.Coordinator, lease.Coordinator != nil, &server, target, leaseID) } else { fmt.Fprintf(a.Stderr, "warning: tailscale metadata wait failed: %v\n", err) } } if resolved, err := resolveNetworkTarget(ctx, cfg, server, target); err != nil { - a.releaseAcquiredLeaseBestEffort(ctx, cfg, coord, useCoordinator, server, target, leaseID) + a.releaseBackendLeaseBestEffort(ctx, sshBackend, LeaseTarget{Server: server, SSH: target, LeaseID: leaseID, Coordinator: lease.Coordinator}) return err } else { target = resolved.Target @@ -182,28 +182,40 @@ func (a App) runCommand(ctx context.Context, args []string) (err error) { if err != nil { return err } - if isBlacksmithProvider(cfg.Provider) { - return a.blacksmithRun(ctx, cfg, repo, blacksmithRunOptions{ - ID: *leaseIDFlag, - Keep: *keep, - Reclaim: *reclaim, - SyncOnly: *syncOnly, - Debug: *debugSync, - ShellMode: *shellMode, - Command: command, - IdleTimeout: cfg.IdleTimeout, - TimingJSON: *timingJSON, + backend, err := loadBackend(cfg, runtimeForApp(a)) + if err != nil { + return err + } + options := leaseOptionsFromConfig(cfg) + if delegated, ok := backend.(DelegatedRunBackend); ok { + _, err := delegated.Run(ctx, RunRequest{ + Repo: repo, + ID: *leaseIDFlag, + Options: options, + Keep: *keep, + Reclaim: *reclaim, + NoSync: *noSync, + SyncOnly: *syncOnly, + DebugSync: *debugSync, + ShellMode: *shellMode, + ChecksumSync: *checksumSync, + ForceSyncLarge: *forceSyncLarge, + Command: command, + TimingJSON: *timingJSON, }) + return err + } + sshBackend, ok := backend.(SSHLeaseBackend) + if !ok { + return exit(2, "provider=%s does not support run", backend.Spec().Name) } var server Server var target SSHTarget var leaseID string acquired := false - coord, useCoordinator, err := newTargetCoordinatorClient(cfg) - if err != nil { - return err - } + coord := backendCoordinator(backend) + useCoordinator := coord != nil recorder := &runRecorder{} var runFailure error recordFailure := func(failure error) error { @@ -217,48 +229,35 @@ func (a App) runCommand(ctx context.Context, args []string) (err error) { recorder.Event("leasing.started", "leasing", "") } if *leaseIDFlag != "" { - if useCoordinator { - var lease CoordinatorLease - lease, err = coord.GetLease(ctx, *leaseIDFlag) - if err == nil { - server, target, leaseID = leaseToServerTarget(lease, cfg) - if resolved, resolveErr := resolveNetworkTarget(ctx, cfg, server, target); resolveErr != nil { - err = resolveErr - } else { - target = resolved.Target - if resolved.FallbackReason != "" { - fmt.Fprintf(a.Stderr, "network fallback %s\n", resolved.FallbackReason) - } - } - if !flagWasSet(fs, "idle-timeout") && lease.IdleTimeoutSeconds > 0 { - cfg.IdleTimeout = time.Duration(lease.IdleTimeoutSeconds) * time.Second - } - } - } else { - server, target, leaseID, err = a.findLease(ctx, cfg, *leaseIDFlag) - if err == nil { - if resolved, resolveErr := resolveNetworkTarget(ctx, cfg, server, target); resolveErr != nil { - err = resolveErr - } else { - target = resolved.Target - if resolved.FallbackReason != "" { - fmt.Fprintf(a.Stderr, "network fallback %s\n", resolved.FallbackReason) - } - } - } - if err == nil && !flagWasSet(fs, "idle-timeout") { - if duration, ok := parseDurationSecondsLabel(server.Labels["idle_timeout_secs"]); ok { - cfg.IdleTimeout = duration - } else if duration, ok := parseDurationSecondsLabel(server.Labels["idle_timeout"]); ok { - cfg.IdleTimeout = duration + var lease LeaseTarget + lease, err = sshBackend.Resolve(ctx, ResolveRequest{Repo: repo, Options: options, ID: *leaseIDFlag, Reclaim: *reclaim}) + if err == nil { + server, target, leaseID = lease.Server, lease.SSH, lease.LeaseID + if resolved, resolveErr := resolveNetworkTarget(ctx, cfg, server, target); resolveErr != nil { + err = resolveErr + } else { + target = resolved.Target + if resolved.FallbackReason != "" { + fmt.Fprintf(a.Stderr, "network fallback %s\n", resolved.FallbackReason) } } } + if err == nil && !flagWasSet(fs, "idle-timeout") { + if useCoordinator { + if duration, ok := parseDurationSecondsLabel(server.Labels["idle_timeout_secs"]); ok { + cfg.IdleTimeout = duration + } + } else if duration, ok := parseDurationSecondsLabel(server.Labels["idle_timeout_secs"]); ok { + cfg.IdleTimeout = duration + } else if duration, ok := parseDurationSecondsLabel(server.Labels["idle_timeout"]); ok { + cfg.IdleTimeout = duration + } + } } else { - if useCoordinator { - server, target, leaseID, err = a.acquireCoordinatorWithRetry(ctx, cfg, coord, *keep) - } else { - server, target, leaseID, err = a.acquireWithRetry(ctx, cfg, *keep) + var lease LeaseTarget + lease, err = sshBackend.Acquire(ctx, AcquireRequest{Repo: repo, Options: options, Keep: *keep, Reclaim: *reclaim}) + if err == nil { + server, target, leaseID = lease.Server, lease.SSH, lease.LeaseID } acquired = true } @@ -274,17 +273,21 @@ func (a App) runCommand(ctx context.Context, args []string) (err error) { } if err := claimLeaseForRepoConfig(leaseID, serverSlug(server), cfg, repo.Root, cfg.IdleTimeout, *reclaim); err != nil { if acquired && !*keep { - a.releaseAcquiredLeaseBestEffort(ctx, cfg, coord, useCoordinator, server, target, leaseID) + a.releaseBackendLeaseBestEffort(ctx, sshBackend, LeaseTarget{Server: server, SSH: target, LeaseID: leaseID, Coordinator: coord}) } return recordFailure(err) } if !useCoordinator && leaseID != "" { - server = a.touchDirectLeaseBestEffort(ctx, cfg, server, blank(server.Labels["state"], "ready")) + if touched, touchErr := sshBackend.Touch(ctx, TouchRequest{Lease: LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, State: blank(server.Labels["state"], "ready"), IdleTimeout: cfg.IdleTimeout}); touchErr == nil { + server = touched + } else { + fmt.Fprintf(a.Stderr, "warning: direct touch failed for %s: %v\n", leaseID, touchErr) + } } if acquired { defer func() { if !*keep { - a.releaseAcquiredLeaseBestEffort(context.Background(), cfg, coord, useCoordinator, server, target, leaseID) + a.releaseBackendLeaseBestEffort(context.Background(), sshBackend, LeaseTarget{Server: server, SSH: target, LeaseID: leaseID, Coordinator: coord}) recorder.Event("lease.released", "released", "") } }() @@ -493,9 +496,17 @@ afterSync: } } if !useCoordinator { - server = a.touchDirectLeaseBestEffort(context.Background(), cfg, server, "running") + if touched, touchErr := sshBackend.Touch(context.Background(), TouchRequest{Lease: LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, State: "running", IdleTimeout: cfg.IdleTimeout}); touchErr == nil { + server = touched + } else { + fmt.Fprintf(a.Stderr, "warning: direct touch state=running: %v\n", touchErr) + } defer func() { - server = a.touchDirectLeaseBestEffort(context.Background(), cfg, server, "ready") + if touched, touchErr := sshBackend.Touch(context.Background(), TouchRequest{Lease: LeaseTarget{Server: server, SSH: target, LeaseID: leaseID}, State: "ready", IdleTimeout: cfg.IdleTimeout}); touchErr == nil { + server = touched + } else { + fmt.Fprintf(a.Stderr, "warning: direct touch state=ready: %v\n", touchErr) + } }() } fmt.Fprintf(a.Stderr, "running on %s %s\n", target.Host, strings.Join(command, " ")) @@ -674,55 +685,6 @@ func shouldUseShell(command []string) bool { return false } -func (a App) acquireCoordinator(ctx context.Context, cfg Config, coord *CoordinatorClient, keep bool) (Server, SSHTarget, string, error) { - leaseID := newLeaseID() - slug := newLeaseSlug(leaseID) - keyPath, publicKey, err := ensureTestboxKeyForConfig(cfg, leaseID) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - cfg.SSHKey = keyPath - cfg.ProviderKey = providerKeyForLease(leaseID) - if cfg.Tailscale.Enabled && cfg.Tailscale.Hostname == "" { - cfg.Tailscale.Hostname = renderTailscaleHostname(cfg.Tailscale.HostnameTemplate, leaseID, slug, cfg.Provider) - } - ensureAWSSSHCIDRs(ctx, &cfg) - fmt.Fprintf(a.Stderr, "coordinator lease class=%s preferred_type=%s keep=%v slug=%s idle_timeout=%s ttl=%s\n", cfg.Class, cfg.ServerType, keep, slug, cfg.IdleTimeout, cfg.TTL) - lease, err := coord.CreateLease(ctx, cfg, publicKey, keep, leaseID, slug) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - if lease.ID != "" && lease.ID != leaseID { - if err := moveStoredTestboxKey(leaseID, lease.ID); err != nil { - fmt.Fprintf(a.Stderr, "warning: could not move local key from %s to %s: %v\n", leaseID, lease.ID, err) - } - } - if err := validateCoordinatorLeaseCapabilities(cfg, lease); err != nil { - if releaseErr := releaseCoordinatorLease(context.Background(), coord, blank(lease.ID, leaseID)); releaseErr != nil { - fmt.Fprintf(a.Stderr, "warning: release failed after capability mismatch for %s: %v\n", blank(lease.ID, leaseID), releaseErr) - } - return Server{}, SSHTarget{}, "", err - } - server, target, leaseID := leaseToServerTarget(lease, cfg) - fmt.Fprintf(a.Stderr, "leased %s slug=%s server=%d type=%s ip=%s via coordinator\n", leaseID, blank(lease.Slug, "-"), server.ID, server.ServerType.Name, target.Host) - if summary := coordinatorFallbackSummary(lease); summary != "" { - fmt.Fprintf(a.Stderr, "fallback resolved %s\n", summary) - } - waitCtx, cancelWait := context.WithCancelCause(ctx) - defer cancelWait(nil) - stopHeartbeat := startCoordinatorHeartbeat(waitCtx, coord, leaseID, cfg.IdleTimeout, nil, a.Stderr) - defer stopHeartbeat() - stopLeaseWatch := startCoordinatorLeaseWatch(waitCtx, coord, leaseID, cancelWait, a.Stderr) - defer stopLeaseWatch() - if err := bootstrapAWSWindowsDesktop(waitCtx, cfg, &target, publicKey, a.Stderr); err != nil { - if releaseErr := releaseCoordinatorLease(context.Background(), coord, leaseID); releaseErr != nil { - fmt.Fprintf(a.Stderr, "warning: release failed after bootstrap error for %s: %v\n", leaseID, releaseErr) - } - return Server{}, SSHTarget{}, "", err - } - return server, target, leaseID, nil -} - func validateCoordinatorLeaseCapabilities(cfg Config, lease CoordinatorLease) error { if cfg.Desktop && !lease.Desktop { return exit(5, "coordinator did not provision desktop=true for lease %s; deploy the coordinator with desktop/VNC support", blank(lease.ID, "-")) @@ -769,40 +731,6 @@ func coordinatorFallbackSummary(lease CoordinatorLease) string { return fmt.Sprintf("requested_type=%s actual_type=%s attempts=%s", lease.RequestedServerType, lease.ServerType, blank(strings.Join(attempts, ","), "-")) } -func (a App) acquireCoordinatorWithRetry(ctx context.Context, cfg Config, coord *CoordinatorClient, keep bool) (Server, SSHTarget, string, error) { - var lastErr error - attempts := acquireAttempts(keep) - for attempt := 1; attempt <= attempts; attempt++ { - server, target, leaseID, err := a.acquireCoordinator(ctx, cfg, coord, keep) - if err == nil { - return server, target, leaseID, nil - } - lastErr = err - if attempt == attempts || !isBootstrapWaitError(err) { - return Server{}, SSHTarget{}, "", err - } - fmt.Fprintf(a.Stderr, "warning: bootstrap failed; retrying with fresh lease: %v\n", err) - } - return Server{}, SSHTarget{}, "", lastErr -} - -func (a App) acquireWithRetry(ctx context.Context, cfg Config, keep bool) (Server, SSHTarget, string, error) { - var lastErr error - attempts := acquireAttempts(keep) - for attempt := 1; attempt <= attempts; attempt++ { - server, target, leaseID, err := a.acquire(ctx, cfg, keep) - if err == nil { - return server, target, leaseID, nil - } - lastErr = err - if attempt == attempts || !isBootstrapWaitError(err) { - return Server{}, SSHTarget{}, "", err - } - fmt.Fprintf(a.Stderr, "warning: bootstrap failed; retrying with fresh lease: %v\n", err) - } - return Server{}, SSHTarget{}, "", lastErr -} - func acquireAttempts(bool) int { return 2 } @@ -832,17 +760,12 @@ func releaseCoordinatorLease(ctx context.Context, coord *CoordinatorClient, leas return lastErr } -func (a App) releaseAcquiredLeaseBestEffort(ctx context.Context, cfg Config, coord *CoordinatorClient, useCoordinator bool, server Server, target SSHTarget, leaseID string) { - a.writeActionsHydrationStopBestEffort(ctx, target, leaseID) - fmt.Fprintf(a.Stderr, "releasing %s server=%s\n", leaseID, server.DisplayID()) - if useCoordinator { - if err := releaseCoordinatorLease(ctx, coord, leaseID); err != nil { - fmt.Fprintf(a.Stderr, "warning: release failed for %s: %v\n", leaseID, err) - } - } else if err := deleteServer(ctx, cfg, server); err != nil { - fmt.Fprintf(a.Stderr, "warning: delete failed for %s: %v\n", leaseID, err) +func (a App) releaseBackendLeaseBestEffort(ctx context.Context, backend SSHLeaseBackend, lease LeaseTarget) { + a.writeActionsHydrationStopBestEffort(ctx, lease.SSH, lease.LeaseID) + fmt.Fprintf(a.Stderr, "releasing %s server=%s\n", lease.LeaseID, lease.Server.DisplayID()) + if err := backend.ReleaseLease(ctx, ReleaseLeaseRequest{Lease: lease, Force: true}); err != nil { + fmt.Fprintf(a.Stderr, "warning: release failed for %s: %v\n", lease.LeaseID, err) } - removeLeaseClaim(leaseID) } func startCoordinatorHeartbeat(ctx context.Context, coord *CoordinatorClient, leaseID string, idleTimeout time.Duration, updateIdleTimeout *time.Duration, stderr io.Writer) func() { @@ -947,189 +870,6 @@ func heartbeatInterval(ttl time.Duration) time.Duration { return interval } -func (a App) touchLeaseBestEffort(ctx context.Context, cfg Config, identifier, leaseID string) { - if _, ok, err := newTargetCoordinatorClient(cfg); err == nil && ok { - if leaseID == "" { - leaseID = identifier - } - a.touchCoordinatorLeaseBestEffort(ctx, cfg, leaseID) - return - } - server, _, _, err := a.findLease(ctx, cfg, identifier) - if err != nil { - fmt.Fprintf(a.Stderr, "warning: direct touch failed for %s: %v\n", identifier, err) - return - } - a.touchDirectLeaseBestEffort(ctx, cfg, server, blank(server.Labels["state"], "ready")) -} - -func (a App) touchActiveLeaseBestEffort(ctx context.Context, cfg Config, server Server, leaseID string) Server { - if _, ok, err := newTargetCoordinatorClient(cfg); err == nil && ok { - a.touchCoordinatorLeaseBestEffort(ctx, cfg, leaseID) - return server - } - return a.touchDirectLeaseBestEffort(ctx, cfg, server, blank(server.Labels["state"], "ready")) -} - -func (a App) touchDirectLeaseBestEffort(ctx context.Context, cfg Config, server Server, state string) Server { - if server.Labels == nil { - server.Labels = map[string]string{} - } - server.Labels = touchDirectLeaseLabels(server.Labels, cfg, state, time.Now().UTC()) - if isStaticProvider(cfg.Provider) || server.Provider == staticProvider { - return server - } - if cfg.Provider == "aws" || server.Provider == "aws" || strings.HasPrefix(server.CloudID, "i-") { - client, err := newAWSClient(ctx, cfg) - if err != nil { - fmt.Fprintf(a.Stderr, "warning: direct touch state=%s: %v\n", state, err) - return server - } - if err := client.SetTags(ctx, server.CloudID, server.Labels); err != nil { - fmt.Fprintf(a.Stderr, "warning: direct touch state=%s: %v\n", state, err) - } - return server - } - client, err := newHetznerClient() - if err != nil { - fmt.Fprintf(a.Stderr, "warning: direct touch state=%s: %v\n", state, err) - return server - } - if err := client.SetLabels(ctx, server.ID, server.Labels); err != nil { - fmt.Fprintf(a.Stderr, "warning: direct touch state=%s: %v\n", state, err) - } - return server -} - -func (a App) acquire(ctx context.Context, cfg Config, keep bool) (Server, SSHTarget, string, error) { - if isStaticProvider(cfg.Provider) { - return a.acquireStatic(ctx, cfg, keep) - } - if cfg.Tailscale.Enabled && cfg.Tailscale.AuthKey == "" { - return Server{}, SSHTarget{}, "", exit(2, "direct --tailscale requires %s to contain a Tailscale auth key; brokered mode uses coordinator OAuth secrets", cfg.Tailscale.AuthKeyEnv) - } - if cfg.Provider == "aws" { - return a.acquireAWS(ctx, cfg, keep) - } - client, err := newHetznerClient() - if err != nil { - return Server{}, SSHTarget{}, "", err - } - leaseID := newLeaseID() - servers, err := client.ListCrabboxServers(ctx) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - slug := allocateDirectLeaseSlug(leaseID, servers) - keyPath, publicKey, err := ensureTestboxKeyForConfig(cfg, leaseID) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - cfg.SSHKey = keyPath - cfg.ProviderKey = providerKeyForLease(leaseID) - if cfg.ProviderKey != "" { - providerKey, err := client.EnsureSSHKey(ctx, cfg.ProviderKey, publicKey) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - cfg.ProviderKey = providerKey.Name - } - fmt.Fprintf(a.Stderr, "provisioning provider=hetzner lease=%s slug=%s class=%s preferred_type=%s location=%s keep=%v\n", leaseID, slug, cfg.Class, cfg.ServerType, cfg.Location, keep) - server, cfg, err := client.CreateServerWithFallback(ctx, cfg, publicKey, leaseID, slug, keep, func(format string, args ...any) { - fmt.Fprintf(a.Stderr, format, args...) - }) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - fmt.Fprintf(a.Stderr, "provisioned lease=%s server=%d type=%s\n", leaseID, server.ID, cfg.ServerType) - server, err = waitForServerIP(ctx, client, server.ID) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - target := sshTargetFromConfig(cfg, server.PublicNet.IPv4.IP) - if err := waitForSSHReady(ctx, &target, a.Stderr, "bootstrap", bootstrapWaitTimeout(cfg)); err != nil { - _ = deleteServer(context.Background(), cfg, server) - return Server{}, SSHTarget{}, "", err - } - server.Labels["state"] = "ready" - if err := client.SetLabels(ctx, server.ID, server.Labels); err != nil { - fmt.Fprintf(a.Stderr, "warning: set labels: %v\n", err) - } - return server, target, leaseID, nil -} - -func (a App) acquireAWS(ctx context.Context, cfg Config, keep bool) (Server, SSHTarget, string, error) { - cfg = a.chooseAWSRegion(ctx, cfg) - client, err := newAWSClient(ctx, cfg) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - leaseID := newLeaseID() - servers, err := client.ListCrabboxServers(ctx) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - slug := allocateDirectLeaseSlug(leaseID, servers) - keyPath, publicKey, err := ensureTestboxKeyForConfig(cfg, leaseID) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - cfg.SSHKey = keyPath - cfg.ProviderKey = providerKeyForLease(leaseID) - ensureAWSSSHCIDRs(ctx, &cfg) - fmt.Fprintf(a.Stderr, "provisioning provider=aws lease=%s slug=%s class=%s preferred_type=%s region=%s keep=%v market=%s strategy=%s\n", leaseID, slug, cfg.Class, cfg.ServerType, cfg.AWSRegion, keep, cfg.Capacity.Market, cfg.Capacity.Strategy) - server, cfg, err := client.CreateServerWithFallback(ctx, cfg, publicKey, leaseID, slug, keep, func(format string, args ...any) { - fmt.Fprintf(a.Stderr, format, args...) - }) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - fmt.Fprintf(a.Stderr, "provisioned lease=%s server=%s type=%s\n", leaseID, server.DisplayID(), cfg.ServerType) - server, err = client.waitForServerIP(ctx, server.CloudID) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - target := sshTargetFromConfig(cfg, server.PublicNet.IPv4.IP) - if err := bootstrapAWSWindowsDesktop(ctx, cfg, &target, publicKey, a.Stderr); err != nil { - _ = client.DeleteServer(context.Background(), server.CloudID) - return Server{}, SSHTarget{}, "", err - } - server.Labels["state"] = "ready" - if err := client.SetTags(ctx, server.CloudID, server.Labels); err != nil { - fmt.Fprintf(a.Stderr, "warning: set tags: %v\n", err) - } - return server, target, leaseID, nil -} - -func (a App) chooseAWSRegion(ctx context.Context, cfg Config) Config { - if cfg.Provider != "aws" || cfg.Capacity.Market != "spot" || len(cfg.Capacity.Regions) < 2 { - return cfg - } - client, err := newAWSClient(ctx, cfg) - if err != nil { - fmt.Fprintf(a.Stderr, "warning: spot placement score unavailable: %v\n", err) - return cfg - } - scores, err := client.SpotPlacementScores(ctx, cfg) - if err != nil { - fmt.Fprintf(a.Stderr, "warning: spot placement score unavailable: %v\n", err) - return cfg - } - if len(scores) == 0 { - return cfg - } - best := awsString(scores[0].Region) - score := int32(0) - if scores[0].Score != nil { - score = *scores[0].Score - } - if best != "" && best != cfg.AWSRegion { - fmt.Fprintf(a.Stderr, "selected aws region=%s spot_score=%d previous=%s\n", best, score, cfg.AWSRegion) - cfg.AWSRegion = best - } - return cfg -} - func waitForServerIP(ctx context.Context, client *HetznerClient, id int64) (Server, error) { deadline := time.Now().Add(5 * time.Minute) for { @@ -1147,76 +887,6 @@ func waitForServerIP(ctx context.Context, client *HetznerClient, id int64) (Serv } } -func (a App) findLease(ctx context.Context, cfg Config, id string) (Server, SSHTarget, string, error) { - if isStaticProvider(cfg.Provider) { - return a.findStaticLease(ctx, cfg, id) - } - if cfg.Provider == "aws" { - return a.findAWSLease(ctx, cfg, id) - } - client, err := newHetznerClient() - if err != nil { - return Server{}, SSHTarget{}, "", err - } - if serverID, ok := parseServerID(id); ok { - server, err := client.GetServer(ctx, serverID) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - leaseID := server.Labels["lease"] - if leaseID == "" { - leaseID = id - } - target := sshTargetFromConfig(cfg, server.PublicNet.IPv4.IP) - useStoredTestboxKey(&target, leaseID) - return server, target, leaseID, nil - } - servers, err := client.ListCrabboxServers(ctx) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - if server, leaseID, err := findServerByAlias(servers, id); err != nil { - return Server{}, SSHTarget{}, "", err - } else if leaseID != "" { - target := sshTargetFromConfig(cfg, server.PublicNet.IPv4.IP) - useStoredTestboxKey(&target, leaseID) - return server, target, leaseID, nil - } - return Server{}, SSHTarget{}, "", exit(4, "lease/server not found: %s", id) -} - -func (a App) findAWSLease(ctx context.Context, cfg Config, id string) (Server, SSHTarget, string, error) { - client, err := newAWSClient(ctx, cfg) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - if strings.HasPrefix(id, "i-") { - server, err := client.GetServer(ctx, id) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - leaseID := server.Labels["lease"] - if leaseID == "" { - leaseID = id - } - target := sshTargetFromConfig(cfg, server.PublicNet.IPv4.IP) - useStoredTestboxKey(&target, leaseID) - return server, target, leaseID, nil - } - servers, err := client.ListCrabboxServers(ctx) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - if server, leaseID, err := findServerByAlias(servers, id); err != nil { - return Server{}, SSHTarget{}, "", err - } else if leaseID != "" { - target := sshTargetFromConfig(cfg, server.PublicNet.IPv4.IP) - useStoredTestboxKey(&target, leaseID) - return server, target, leaseID, nil - } - return Server{}, SSHTarget{}, "", exit(4, "lease/server not found: %s", id) -} - func findServerByAlias(servers []Server, id string) (Server, string, error) { if isCanonicalLeaseID(id) { for _, server := range servers { @@ -1252,9 +922,11 @@ func findServerByAlias(servers []Server, id string) (Server, string, error) { } func (a App) stop(ctx context.Context, args []string) error { + defaults := defaultConfig() fs := newFlagSet("stop", a.Stderr) - provider := fs.String("provider", defaultConfig().Provider, "provider: hetzner, aws, ssh, or blacksmith-testbox") - targetFlags := registerTargetFlags(fs, defaultConfig()) + provider := fs.String("provider", defaults.Provider, "provider: hetzner, aws, ssh, or blacksmith-testbox") + providerFlags := registerProviderFlags(fs, defaults) + targetFlags := registerTargetFlags(fs, defaults) if err := parseFlags(fs, args); err != nil { return err } @@ -1266,39 +938,47 @@ func (a App) stop(ctx context.Context, args []string) error { return err } cfg.Provider = *provider + if err := applyProviderFlags(&cfg, fs, providerFlags); err != nil { + return err + } if err := applyTargetFlagOverrides(&cfg, fs, targetFlags); err != nil { return err } - if isBlacksmithProvider(cfg.Provider) { - return a.blacksmithStop(ctx, cfg, fs.Arg(0)) - } - if coord, ok, err := newTargetCoordinatorClient(cfg); err != nil { - return err - } else if ok { - if lease, err := coord.GetLease(ctx, fs.Arg(0)); err == nil { - _, target, leaseID := leaseToServerTarget(lease, cfg) - a.writeActionsHydrationStopBestEffort(ctx, target, leaseID) - } else { - fmt.Fprintf(a.Stderr, "warning: could not inspect lease before release: %v\n", err) - } - released, err := coord.ReleaseLease(ctx, fs.Arg(0), true) - if err != nil { - return err - } - removeLeaseClaim(released.ID) - fmt.Fprintf(a.Stderr, "released lease=%s server=%s\n", released.ID, leaseDisplayID(released)) - return nil - } - server, target, leaseID, err := a.findLease(ctx, cfg, fs.Arg(0)) + backend, err := loadBackend(cfg, runtimeForApp(a)) if err != nil { return err } - a.writeActionsHydrationStopBestEffort(ctx, target, leaseID) - fmt.Fprintf(a.Stderr, "deleting lease=%s server=%s name=%s\n", leaseID, server.DisplayID(), server.Name) - if err := deleteServer(ctx, cfg, server); err != nil { + if delegated, ok := backend.(DelegatedRunBackend); ok { + return delegated.Stop(ctx, StopRequest{Options: leaseOptionsFromConfig(cfg), ID: fs.Arg(0)}) + } + sshBackend, ok := backend.(SSHLeaseBackend) + if !ok { + return exit(2, "provider=%s does not support stop", backend.Spec().Name) + } + lease, err := sshBackend.Resolve(ctx, ResolveRequest{Options: leaseOptionsFromConfig(cfg), ID: fs.Arg(0)}) + if err != nil { + if backendCoordinator(backend) != nil { + fmt.Fprintf(a.Stderr, "warning: could not inspect lease before release: %v\n", err) + lease = LeaseTarget{LeaseID: fs.Arg(0)} + } else { + return err + } + } + if lease.SSH.Host != "" { + a.writeActionsHydrationStopBestEffort(ctx, lease.SSH, lease.LeaseID) + } + if err := sshBackend.ReleaseLease(ctx, ReleaseLeaseRequest{Lease: lease, Force: true}); err != nil { return err } - removeLeaseClaim(leaseID) + if backendCoordinator(backend) != nil { + fmt.Fprintf(a.Stderr, "released lease=%s server=%s\n", lease.LeaseID, lease.Server.DisplayID()) + return nil + } + if isStaticProvider(cfg.Provider) || lease.Server.Provider == staticProvider { + fmt.Fprintf(a.Stderr, "released static lease=%s host=%s\n", lease.LeaseID, lease.SSH.Host) + return nil + } + fmt.Fprintf(a.Stderr, "deleted lease=%s server=%s name=%s\n", lease.LeaseID, lease.Server.DisplayID(), lease.Server.Name) return nil } diff --git a/internal/cli/static.go b/internal/cli/static.go index 9546ec4..fa17ec7 100644 --- a/internal/cli/static.go +++ b/internal/cli/static.go @@ -1,9 +1,7 @@ package cli import ( - "context" "flag" - "fmt" "strings" "time" ) @@ -53,30 +51,6 @@ func applyTargetFlagOverrides(cfg *Config, fs *flag.FlagSet, values targetFlagVa return validateTargetConfig(*cfg) } -func (a App) acquireStatic(ctx context.Context, cfg Config, keep bool) (Server, SSHTarget, string, error) { - server, target, leaseID, err := staticLease(cfg) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - fmt.Fprintf(a.Stderr, "using static target lease=%s slug=%s target=%s windows_mode=%s host=%s keep=%v\n", leaseID, serverSlug(server), cfg.TargetOS, cfg.WindowsMode, target.Host, keep) - if err := waitForSSH(ctx, &target, a.Stderr); err != nil { - return Server{}, SSHTarget{}, "", err - } - server.Labels["state"] = "ready" - return server, target, leaseID, nil -} - -func (a App) findStaticLease(_ context.Context, cfg Config, id string) (Server, SSHTarget, string, error) { - server, target, leaseID, err := staticLease(cfg) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - if id == "" || id == leaseID || id == server.Name || id == serverSlug(server) || id == cfg.Static.Host { - return server, target, leaseID, nil - } - return Server{}, SSHTarget{}, "", exit(4, "static lease not found: %s", id) -} - func staticLease(cfg Config) (Server, SSHTarget, string, error) { if cfg.Static.Host == "" { return Server{}, SSHTarget{}, "", exit(2, "provider=%s requires static.host or CRABBOX_STATIC_HOST", cfg.Provider) diff --git a/internal/cli/status.go b/internal/cli/status.go index a148b58..b2ba9f5 100644 --- a/internal/cli/status.go +++ b/internal/cli/status.go @@ -15,6 +15,7 @@ func (a App) status(ctx context.Context, args []string) error { wait := fs.Bool("wait", false, "wait until ready") waitTimeout := fs.Duration("wait-timeout", 5*time.Minute, "maximum wait duration") jsonOut := fs.Bool("json", false, "print JSON") + providerFlags := registerProviderFlags(fs, defaults) targetFlags := registerTargetFlags(fs, defaults) networkFlags := registerNetworkModeFlag(fs, defaults) if err := parseFlags(fs, args); err != nil { @@ -25,21 +26,42 @@ func (a App) status(ctx context.Context, args []string) error { if err != nil { return err } + if err := applyProviderFlags(&cfg, fs, providerFlags); err != nil { + return err + } if err := requireLeaseID(*id, "crabbox status --id ", cfg); err != nil { return err } - if isBlacksmithProvider(cfg.Provider) { - return a.blacksmithStatus(ctx, cfg, *id, *wait, *waitTimeout, *jsonOut) + backend, err := loadBackend(cfg, runtimeForApp(a)) + if err != nil { + return err } + delegated, isDelegated := backend.(DelegatedRunBackend) + sshBackend, isSSH := backend.(SSHLeaseBackend) deadline := time.Now().Add(*waitTimeout) for { - state, err := a.leaseStatus(ctx, cfg, *id) + var state statusView + var err error + if isDelegated { + state, err = delegated.Status(ctx, StatusRequest{Options: leaseOptionsFromConfig(cfg), ID: *id, Wait: *wait, WaitTimeout: *waitTimeout}) + } else if isSSH { + var lease LeaseTarget + lease, err = sshBackend.Resolve(ctx, ResolveRequest{Options: leaseOptionsFromConfig(cfg), ID: *id}) + if err == nil { + state, err = statusViewFromLeaseTarget(ctx, cfg, lease) + if err == nil && *wait { + _, touchErr := sshBackend.Touch(ctx, TouchRequest{Lease: lease, State: state.State, IdleTimeout: cfg.IdleTimeout}) + if touchErr != nil { + fmt.Fprintf(a.Stderr, "warning: touch failed for %s: %v\n", lease.LeaseID, touchErr) + } + } + } + } else { + state, err = a.leaseStatus(ctx, cfg, *id) + } if err != nil { return err } - if *wait { - a.touchLeaseBestEffort(ctx, cfg, *id, state.ID) - } if *jsonOut { if !*wait || state.Ready { return json.NewEncoder(a.Stdout).Encode(state) @@ -61,6 +83,48 @@ func (a App) status(ctx context.Context, args []string) error { } } +func statusViewFromLeaseTarget(ctx context.Context, cfg Config, lease LeaseTarget) (statusView, error) { + server := lease.Server + target := lease.SSH + hasHost := server.PublicNet.IPv4.IP != "" + resolved, err := resolveNetworkTarget(ctx, cfg, server, target) + if err != nil { + return statusView{}, err + } + target = resolved.Target + ready := hasHost && blank(server.Labels["state"], server.Status) != "provisioning" && probeSSHReady(ctx, &target, 4*time.Second) + meta := serverTailscaleMetadata(server) + var tailscale *TailscaleMetadata + if meta.Enabled { + tailscale = &meta + } + return statusView{ + ID: lease.LeaseID, + Slug: serverSlug(server), + Provider: blank(server.Provider, cfg.Provider), + TargetOS: blank(server.Labels["target"], cfg.TargetOS), + WindowsMode: blank(server.Labels["windows_mode"], cfg.WindowsMode), + State: blank(server.Labels["state"], server.Status), + ServerID: server.DisplayID(), + ServerType: server.ServerType.Name, + Host: server.PublicNet.IPv4.IP, + Network: resolved.Network, + Tailscale: tailscale, + SSHHost: target.Host, + SSHUser: target.User, + SSHPort: target.Port, + SSHFallbackPorts: target.FallbackPorts, + SSHKey: target.Key, + LastTouchedAt: blank(leaseLabelTimeDisplay(server.Labels["last_touched_at"]), server.Labels["last_touched_at"]), + IdleFor: idleForString(server.Labels["last_touched_at"], time.Now()), + IdleTimeout: leaseLabelDurationDisplay(server.Labels["idle_timeout_secs"], server.Labels["idle_timeout"]), + ExpiresAt: blank(leaseLabelTimeDisplay(server.Labels["expires_at"]), server.Labels["expires_at"]), + Labels: server.Labels, + HasHost: hasHost, + Ready: ready, + }, nil +} + type statusView struct { ID string `json:"id"` Slug string `json:"slug,omitempty"` @@ -88,102 +152,38 @@ type statusView struct { } func (a App) leaseStatus(ctx context.Context, cfg Config, id string) (statusView, error) { - if coord, ok, err := newTargetCoordinatorClient(cfg); err != nil { - return statusView{}, err - } else if ok { - lease, err := coord.GetLease(ctx, id) - if err != nil { - return statusView{}, err - } - server, target, _ := leaseToServerTarget(lease, cfg) - resolved, err := resolveNetworkTarget(ctx, cfg, server, target) - if err != nil { - return statusView{}, err - } - target = resolved.Target - hasHost := lease.Host != "" - ready := lease.State == "active" && hasHost && probeSSHReady(ctx, &target, 4*time.Second) - return statusView{ - ID: lease.ID, - Slug: lease.Slug, - Provider: blank(lease.Provider, cfg.Provider), - TargetOS: blank(target.TargetOS, cfg.TargetOS), - WindowsMode: blank(target.WindowsMode, cfg.WindowsMode), - State: lease.State, - ServerID: leaseDisplayID(lease), - ServerType: lease.ServerType, - Host: lease.Host, - Network: resolved.Network, - Tailscale: lease.Tailscale, - SSHHost: target.Host, - SSHUser: target.User, - SSHPort: target.Port, - SSHFallbackPorts: target.FallbackPorts, - SSHKey: target.Key, - LastTouchedAt: lease.LastTouchedAt, - IdleFor: idleForString(lease.LastTouchedAt, time.Now()), - IdleTimeout: formatSecondsDuration(lease.IdleTimeoutSeconds), - ExpiresAt: lease.ExpiresAt, - Labels: map[string]string{"keep": fmt.Sprint(lease.Keep)}, - HasHost: hasHost, - Ready: ready, - }, nil - } - server, target, leaseID, err := a.findLease(ctx, cfg, id) + backend, err := loadBackend(cfg, runtimeForApp(a)) if err != nil { return statusView{}, err } - hasHost := server.PublicNet.IPv4.IP != "" - resolved, err := resolveNetworkTarget(ctx, cfg, server, target) + if delegated, ok := backend.(DelegatedRunBackend); ok { + return delegated.Status(ctx, StatusRequest{Options: leaseOptionsFromConfig(cfg), ID: id}) + } + sshBackend, ok := backend.(SSHLeaseBackend) + if !ok { + return statusView{}, exit(2, "provider=%s does not support status", backend.Spec().Name) + } + lease, err := sshBackend.Resolve(ctx, ResolveRequest{Options: leaseOptionsFromConfig(cfg), ID: id}) if err != nil { return statusView{}, err } - target = resolved.Target - ready := hasHost && server.Labels["state"] != "provisioning" && probeSSHReady(ctx, &target, 4*time.Second) - meta := serverTailscaleMetadata(server) - var tailscale *TailscaleMetadata - if meta.Enabled { - tailscale = &meta - } - return statusView{ - ID: leaseID, - Slug: serverSlug(server), - Provider: blank(server.Provider, cfg.Provider), - TargetOS: blank(server.Labels["target"], cfg.TargetOS), - WindowsMode: blank(server.Labels["windows_mode"], cfg.WindowsMode), - State: blank(server.Labels["state"], server.Status), - ServerID: server.DisplayID(), - ServerType: server.ServerType.Name, - Host: server.PublicNet.IPv4.IP, - Network: resolved.Network, - Tailscale: tailscale, - SSHHost: target.Host, - SSHUser: target.User, - SSHPort: target.Port, - SSHFallbackPorts: target.FallbackPorts, - SSHKey: target.Key, - LastTouchedAt: blank(leaseLabelTimeDisplay(server.Labels["last_touched_at"]), server.Labels["last_touched_at"]), - IdleFor: idleForString(server.Labels["last_touched_at"], time.Now()), - IdleTimeout: leaseLabelDurationDisplay(server.Labels["idle_timeout_secs"], server.Labels["idle_timeout"]), - ExpiresAt: blank(leaseLabelTimeDisplay(server.Labels["expires_at"]), server.Labels["expires_at"]), - Labels: server.Labels, - HasHost: hasHost, - Ready: ready, - }, nil + return statusViewFromLeaseTarget(ctx, cfg, lease) } func (a App) resolveLeaseTarget(ctx context.Context, cfg Config, id string) (Server, SSHTarget, string, error) { - if coord, ok, err := newTargetCoordinatorClient(cfg); err != nil { + backend, err := loadBackend(cfg, runtimeForApp(a)) + if err != nil { return Server{}, SSHTarget{}, "", err - } else if ok { - lease, err := coord.GetLease(ctx, id) - if err != nil { - return Server{}, SSHTarget{}, "", err - } - server, target, leaseID := leaseToServerTarget(lease, cfg) - return server, target, leaseID, nil } - return a.findLease(ctx, cfg, id) + sshBackend, ok := backend.(SSHLeaseBackend) + if !ok { + return Server{}, SSHTarget{}, "", exit(2, "provider=%s does not expose an SSH target", backend.Spec().Name) + } + lease, err := sshBackend.Resolve(ctx, ResolveRequest{Options: leaseOptionsFromConfig(cfg), ID: id}) + if err != nil { + return Server{}, SSHTarget{}, "", err + } + return lease.Server, lease.SSH, lease.LeaseID, nil } func idleForString(value string, now time.Time) string { diff --git a/internal/cli/target.go b/internal/cli/target.go index ca5ebc3..6199a01 100644 --- a/internal/cli/target.go +++ b/internal/cli/target.go @@ -100,14 +100,12 @@ func validateTargetConfig(cfg Config) error { } func validateProviderTarget(cfg Config) error { - if isStaticProvider(cfg.Provider) || isBlacksmithProvider(cfg.Provider) { - return nil + provider, err := ProviderFor(cfg.Provider) + if err != nil { + return err } - if cfg.Provider == "aws" && cfg.TargetOS == targetWindows && cfg.WindowsMode == windowsModeNormal { - return nil - } - if cfg.Provider == "aws" && cfg.TargetOS == targetWindows && cfg.WindowsMode == windowsModeWSL2 { - return nil + if !providerSpecSupportsTarget(provider.Spec(), cfg.TargetOS, cfg.WindowsMode) { + return exit(2, "%s", unsupportedManagedTargetMessage(provider.Name(), cfg.TargetOS)) } if cfg.Provider == "aws" && cfg.TargetOS == targetMacOS { if cfg.AWSMacHostID == "" && cfg.Coordinator == "" { @@ -118,12 +116,22 @@ func validateProviderTarget(cfg Config) error { } return nil } - if cfg.TargetOS != targetLinux { - return exit(2, "%s", unsupportedManagedTargetMessage(cfg.Provider, cfg.TargetOS)) - } return nil } +func providerSpecSupportsTarget(spec ProviderSpec, targetOS, windowsMode string) bool { + for _, target := range spec.Targets { + if target.OS != targetOS { + continue + } + if targetOS == targetWindows && target.WindowsMode != "" && target.WindowsMode != windowsMode { + continue + } + return true + } + return false +} + func unsupportedManagedTargetMessage(provider, target string) string { switch target { case targetWindows: diff --git a/internal/providers/all/all.go b/internal/providers/all/all.go new file mode 100644 index 0000000..f269571 --- /dev/null +++ b/internal/providers/all/all.go @@ -0,0 +1,8 @@ +package all + +import ( + _ "github.com/openclaw/crabbox/internal/providers/aws" + _ "github.com/openclaw/crabbox/internal/providers/blacksmith" + _ "github.com/openclaw/crabbox/internal/providers/hetzner" + _ "github.com/openclaw/crabbox/internal/providers/ssh" +) diff --git a/internal/providers/aws/provider.go b/internal/providers/aws/provider.go new file mode 100644 index 0000000..309e9b4 --- /dev/null +++ b/internal/providers/aws/provider.go @@ -0,0 +1,37 @@ +package aws + +import ( + "flag" + + "github.com/openclaw/crabbox/internal/cli" +) + +func init() { + cli.RegisterProvider(Provider{}) +} + +type Provider struct{} + +func (Provider) Name() string { return "aws" } +func (Provider) Aliases() []string { return nil } +func (Provider) Spec() cli.ProviderSpec { + return cli.ProviderSpec{ + Name: "aws", + Kind: cli.ProviderKindSSHLease, + Targets: []cli.TargetSpec{ + {OS: "linux"}, + {OS: "windows", WindowsMode: "normal"}, + {OS: "windows", WindowsMode: "wsl2"}, + {OS: "macos"}, + }, + Features: cli.FeatureSet{cli.FeatureSSH, cli.FeatureCrabboxSync, cli.FeatureCleanup, cli.FeatureDesktop, cli.FeatureBrowser, cli.FeatureCode}, + Coordinator: cli.CoordinatorSupported, + } +} +func (Provider) RegisterFlags(*flag.FlagSet, cli.Config) any { return cli.NoProviderFlags() } +func (Provider) ApplyFlags(*cli.Config, *flag.FlagSet, any) error { + return nil +} +func (p Provider) Configure(cfg cli.Config, rt cli.Runtime) (cli.Backend, error) { + return cli.NewAWSLeaseBackend(p.Spec(), cfg, rt), nil +} diff --git a/internal/providers/blacksmith/provider.go b/internal/providers/blacksmith/provider.go new file mode 100644 index 0000000..13660c0 --- /dev/null +++ b/internal/providers/blacksmith/provider.go @@ -0,0 +1,36 @@ +package blacksmith + +import ( + "flag" + + "github.com/openclaw/crabbox/internal/cli" +) + +func init() { + cli.RegisterProvider(Provider{}) +} + +type Provider struct{} + +func (Provider) Name() string { return "blacksmith-testbox" } +func (Provider) Aliases() []string { + return []string{"blacksmith"} +} +func (Provider) Spec() cli.ProviderSpec { + return cli.ProviderSpec{ + Name: "blacksmith-testbox", + Kind: cli.ProviderKindDelegatedRun, + Targets: []cli.TargetSpec{{OS: "linux"}}, + Features: nil, + Coordinator: cli.CoordinatorNever, + } +} +func (Provider) RegisterFlags(fs *flag.FlagSet, defaults cli.Config) any { + return cli.RegisterBlacksmithProviderFlags(fs, defaults) +} +func (Provider) ApplyFlags(cfg *cli.Config, fs *flag.FlagSet, values any) error { + return cli.ApplyBlacksmithProviderFlags(cfg, fs, values) +} +func (p Provider) Configure(cfg cli.Config, rt cli.Runtime) (cli.Backend, error) { + return cli.NewBlacksmithBackend(p.Spec(), cfg, rt), nil +} diff --git a/internal/providers/hetzner/provider.go b/internal/providers/hetzner/provider.go new file mode 100644 index 0000000..3a197c5 --- /dev/null +++ b/internal/providers/hetzner/provider.go @@ -0,0 +1,32 @@ +package hetzner + +import ( + "flag" + + "github.com/openclaw/crabbox/internal/cli" +) + +func init() { + cli.RegisterProvider(Provider{}) +} + +type Provider struct{} + +func (Provider) Name() string { return "hetzner" } +func (Provider) Aliases() []string { return nil } +func (Provider) Spec() cli.ProviderSpec { + return cli.ProviderSpec{ + Name: "hetzner", + Kind: cli.ProviderKindSSHLease, + Targets: []cli.TargetSpec{{OS: "linux"}}, + Features: cli.FeatureSet{cli.FeatureSSH, cli.FeatureCrabboxSync, cli.FeatureCleanup, cli.FeatureDesktop, cli.FeatureBrowser, cli.FeatureCode, cli.FeatureTailscale}, + Coordinator: cli.CoordinatorSupported, + } +} +func (Provider) RegisterFlags(*flag.FlagSet, cli.Config) any { return cli.NoProviderFlags() } +func (Provider) ApplyFlags(*cli.Config, *flag.FlagSet, any) error { + return nil +} +func (p Provider) Configure(cfg cli.Config, rt cli.Runtime) (cli.Backend, error) { + return cli.NewHetznerLeaseBackend(p.Spec(), cfg, rt), nil +} diff --git a/internal/providers/ssh/provider.go b/internal/providers/ssh/provider.go new file mode 100644 index 0000000..aee860f --- /dev/null +++ b/internal/providers/ssh/provider.go @@ -0,0 +1,39 @@ +package ssh + +import ( + "flag" + + "github.com/openclaw/crabbox/internal/cli" +) + +func init() { + cli.RegisterProvider(Provider{}) +} + +type Provider struct{} + +func (Provider) Name() string { return "ssh" } +func (Provider) Aliases() []string { + return []string{"static", "static-ssh"} +} +func (Provider) Spec() cli.ProviderSpec { + return cli.ProviderSpec{ + Name: "ssh", + Kind: cli.ProviderKindSSHLease, + Targets: []cli.TargetSpec{ + {OS: "linux"}, + {OS: "windows", WindowsMode: "normal"}, + {OS: "windows", WindowsMode: "wsl2"}, + {OS: "macos"}, + }, + Features: cli.FeatureSet{cli.FeatureSSH, cli.FeatureCrabboxSync, cli.FeatureDesktop, cli.FeatureBrowser, cli.FeatureCode}, + Coordinator: cli.CoordinatorNever, + } +} +func (Provider) RegisterFlags(*flag.FlagSet, cli.Config) any { return cli.NoProviderFlags() } +func (Provider) ApplyFlags(*cli.Config, *flag.FlagSet, any) error { + return nil +} +func (p Provider) Configure(cfg cli.Config, rt cli.Runtime) (cli.Backend, error) { + return cli.NewStaticSSHLeaseBackend(p.Spec(), cfg, rt), nil +}