fix(blacksmith): explain queued outage timeouts
This commit is contained in:
parent
3d7b3ebfe6
commit
9200bdb060
@ -90,6 +90,12 @@ The wrapper is deliberately thin for warmup, run, and stop. `crabbox list` and
|
||||
views so rendering stays core-owned across providers. Status currently reads
|
||||
`blacksmith testbox list --all` to build that view.
|
||||
|
||||
If `blacksmith testbox list --all` and `crabbox status --provider
|
||||
blacksmith-testbox --id <tbx_id>` work but new warmups remain `queued` with no
|
||||
IP, treat it as Blacksmith service, queue, org-limit, or billing pressure
|
||||
instead of a Crabbox provisioning bug. Stop queued IDs you created and switch to
|
||||
another provider until the Blacksmith account or service recovers.
|
||||
|
||||
`crabbox list --provider blacksmith-testbox --json` parses the Blacksmith table
|
||||
output into compatibility JSON rows with the fields Crabbox can see. That parser is a
|
||||
compatibility layer, not a Blacksmith API contract. If the Blacksmith CLI adds
|
||||
|
||||
@ -77,6 +77,11 @@ blacksmith testbox list --all
|
||||
blacksmith testbox stop ...
|
||||
```
|
||||
|
||||
If list/status calls work but new warmups sit `queued` with no IP, the
|
||||
Blacksmith service or organization is accepting requests but not assigning
|
||||
capacity. Stop queued IDs you created and use AWS, Hetzner, Static SSH, or
|
||||
Daytona until Blacksmith service, billing, or org limits are healthy again.
|
||||
|
||||
Crabbox stores a per-Testbox SSH key locally, claims the Testbox for the current
|
||||
repo, maps IDs to friendly slugs, and prints a normal Crabbox timing summary.
|
||||
|
||||
|
||||
@ -101,7 +101,8 @@ Checks:
|
||||
```sh
|
||||
bin/crabbox list --json
|
||||
bin/crabbox usage --scope all
|
||||
bin/crabbox warmup --provider aws --class beast --market on-demand --timing-json
|
||||
CRABBOX_CAPACITY_REGIONS=eu-west-1,eu-west-2,eu-central-1,us-east-1,us-west-2 \
|
||||
bin/crabbox warmup --provider aws --class standard --market on-demand --timing-json
|
||||
```
|
||||
|
||||
Fixes:
|
||||
|
||||
@ -185,16 +185,18 @@ func (b *blacksmithBackend) Status(ctx context.Context, req StatusRequest) (stat
|
||||
return statusView{}, err
|
||||
}
|
||||
deadline := b.rt.Clock.Now().Add(req.WaitTimeout)
|
||||
var lastState statusView
|
||||
for {
|
||||
state, err := b.blacksmithStatusView(ctx, leaseID)
|
||||
if err != nil {
|
||||
return statusView{}, err
|
||||
}
|
||||
lastState = state
|
||||
if !req.Wait || state.Ready {
|
||||
return state, nil
|
||||
}
|
||||
if b.rt.Clock.Now().After(deadline) {
|
||||
return statusView{}, exit(5, "timed out waiting for %s to become ready", req.ID)
|
||||
return statusView{}, exit(5, "%s", blacksmithWaitTimeoutMessage(req.ID, lastState.State))
|
||||
}
|
||||
time.Sleep(5 * time.Second)
|
||||
}
|
||||
@ -387,6 +389,17 @@ func blacksmithItemToServer(item blacksmithListItem) Server {
|
||||
return server
|
||||
}
|
||||
|
||||
func blacksmithWaitTimeoutMessage(identifier, state string) string {
|
||||
state = strings.TrimSpace(state)
|
||||
if strings.EqualFold(state, "queued") {
|
||||
return fmt.Sprintf("timed out waiting for %s to become ready (last state queued; Blacksmith queue may be stalled, so stop queued ids you created or use another provider)", identifier)
|
||||
}
|
||||
if state != "" {
|
||||
return fmt.Sprintf("timed out waiting for %s to become ready (last state %s)", identifier, state)
|
||||
}
|
||||
return fmt.Sprintf("timed out waiting for %s to become ready", identifier)
|
||||
}
|
||||
|
||||
type statusView = core.StatusView
|
||||
|
||||
func rejectDelegatedSyncOptions(provider string, req RunRequest) error {
|
||||
|
||||
@ -315,6 +315,24 @@ func TestBlacksmithBackendUsesInjectedCommandRunnerForListAndStatus(t *testing.T
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlacksmithStatusWaitTimeoutMentionsQueuedState(t *testing.T) {
|
||||
runner := &blacksmithFuncRunner{fn: func(LocalCommandRequest) (LocalCommandResult, error) {
|
||||
return LocalCommandResult{
|
||||
Stdout: "tbx_123 queued openclaw .github/workflows/testbox.yml test main 2026-05-06T00:00:00Z\n",
|
||||
}, nil
|
||||
}}
|
||||
backend := newTestBlacksmithBackend(baseConfig(), runner)
|
||||
_, err := backend.Status(context.Background(), StatusRequest{ID: "tbx_123", Wait: true, WaitTimeout: -time.Second})
|
||||
if err == nil {
|
||||
t.Fatal("expected queued timeout")
|
||||
}
|
||||
for _, want := range []string{"last state queued", "Blacksmith queue may be stalled"} {
|
||||
if !strings.Contains(err.Error(), want) {
|
||||
t.Fatalf("error=%q, want %q", err.Error(), want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlacksmithBackendListJSONKeepsParsedTableShape(t *testing.T) {
|
||||
runner := &blacksmithFuncRunner{fn: func(LocalCommandRequest) (LocalCommandResult, error) {
|
||||
return LocalCommandResult{
|
||||
|
||||
Loading…
Reference in New Issue
Block a user