fix(blacksmith): explain queued outage timeouts

This commit is contained in:
Vincent Koc 2026-05-06 15:21:25 -07:00
parent 3d7b3ebfe6
commit 9200bdb060
No known key found for this signature in database
5 changed files with 45 additions and 2 deletions

View File

@ -90,6 +90,12 @@ The wrapper is deliberately thin for warmup, run, and stop. `crabbox list` and
views so rendering stays core-owned across providers. Status currently reads
`blacksmith testbox list --all` to build that view.
If `blacksmith testbox list --all` and `crabbox status --provider
blacksmith-testbox --id <tbx_id>` work but new warmups remain `queued` with no
IP, treat it as Blacksmith service, queue, org-limit, or billing pressure
instead of a Crabbox provisioning bug. Stop queued IDs you created and switch to
another provider until the Blacksmith account or service recovers.
`crabbox list --provider blacksmith-testbox --json` parses the Blacksmith table
output into compatibility JSON rows with the fields Crabbox can see. That parser is a
compatibility layer, not a Blacksmith API contract. If the Blacksmith CLI adds

View File

@ -77,6 +77,11 @@ blacksmith testbox list --all
blacksmith testbox stop ...
```
If list/status calls work but new warmups sit `queued` with no IP, the
Blacksmith service or organization is accepting requests but not assigning
capacity. Stop queued IDs you created and use AWS, Hetzner, Static SSH, or
Daytona until Blacksmith service, billing, or org limits are healthy again.
Crabbox stores a per-Testbox SSH key locally, claims the Testbox for the current
repo, maps IDs to friendly slugs, and prints a normal Crabbox timing summary.

View File

@ -101,7 +101,8 @@ Checks:
```sh
bin/crabbox list --json
bin/crabbox usage --scope all
bin/crabbox warmup --provider aws --class beast --market on-demand --timing-json
CRABBOX_CAPACITY_REGIONS=eu-west-1,eu-west-2,eu-central-1,us-east-1,us-west-2 \
bin/crabbox warmup --provider aws --class standard --market on-demand --timing-json
```
Fixes:

View File

@ -185,16 +185,18 @@ func (b *blacksmithBackend) Status(ctx context.Context, req StatusRequest) (stat
return statusView{}, err
}
deadline := b.rt.Clock.Now().Add(req.WaitTimeout)
var lastState statusView
for {
state, err := b.blacksmithStatusView(ctx, leaseID)
if err != nil {
return statusView{}, err
}
lastState = state
if !req.Wait || state.Ready {
return state, nil
}
if b.rt.Clock.Now().After(deadline) {
return statusView{}, exit(5, "timed out waiting for %s to become ready", req.ID)
return statusView{}, exit(5, "%s", blacksmithWaitTimeoutMessage(req.ID, lastState.State))
}
time.Sleep(5 * time.Second)
}
@ -387,6 +389,17 @@ func blacksmithItemToServer(item blacksmithListItem) Server {
return server
}
func blacksmithWaitTimeoutMessage(identifier, state string) string {
state = strings.TrimSpace(state)
if strings.EqualFold(state, "queued") {
return fmt.Sprintf("timed out waiting for %s to become ready (last state queued; Blacksmith queue may be stalled, so stop queued ids you created or use another provider)", identifier)
}
if state != "" {
return fmt.Sprintf("timed out waiting for %s to become ready (last state %s)", identifier, state)
}
return fmt.Sprintf("timed out waiting for %s to become ready", identifier)
}
type statusView = core.StatusView
func rejectDelegatedSyncOptions(provider string, req RunRequest) error {

View File

@ -315,6 +315,24 @@ func TestBlacksmithBackendUsesInjectedCommandRunnerForListAndStatus(t *testing.T
}
}
func TestBlacksmithStatusWaitTimeoutMentionsQueuedState(t *testing.T) {
runner := &blacksmithFuncRunner{fn: func(LocalCommandRequest) (LocalCommandResult, error) {
return LocalCommandResult{
Stdout: "tbx_123 queued openclaw .github/workflows/testbox.yml test main 2026-05-06T00:00:00Z\n",
}, nil
}}
backend := newTestBlacksmithBackend(baseConfig(), runner)
_, err := backend.Status(context.Background(), StatusRequest{ID: "tbx_123", Wait: true, WaitTimeout: -time.Second})
if err == nil {
t.Fatal("expected queued timeout")
}
for _, want := range []string{"last state queued", "Blacksmith queue may be stalled"} {
if !strings.Contains(err.Error(), want) {
t.Fatalf("error=%q, want %q", err.Error(), want)
}
}
}
func TestBlacksmithBackendListJSONKeepsParsedTableShape(t *testing.T) {
runner := &blacksmithFuncRunner{fn: func(LocalCommandRequest) (LocalCommandResult, error) {
return LocalCommandResult{