fix(coordinator): keep capacity requests sparse

This commit is contained in:
Vincent Koc 2026-05-06 16:05:01 -07:00
parent 0e19455e57
commit eb1e92f680
No known key found for this signature in database
4 changed files with 116 additions and 19 deletions

View File

@ -29,6 +29,7 @@
### Changed
- Changed AWS capacity fallback to route configured `CRABBOX_CAPACITY_REGIONS` across both brokered and direct AWS launches, with the deployed coordinator defaulting to a wider multi-region pool for better headroom.
- Changed coordinator lease requests to omit the default capacity block, preserving mixed-version broker compatibility while still sending explicit market, strategy, fallback, multi-region, availability-zone, or hint opt-out settings.
- Changed coordinator-backed CLI lease output to print broker capacity hints when AWS routing, quota, Spot fallback, or configured high-pressure classes are involved.
- Changed the portal lease table to merge external Blacksmith Testbox runners into the main grid as muted, disabled rows instead of rendering a separate external-runners table.
- Refactored built-in provider backend implementations into `internal/providers/<name>` packages while keeping command orchestration and rendering core-owned.

View File

@ -54,6 +54,13 @@ pressure, Spot-to-On-Demand fallback, and high-pressure class warnings. Set
`CRABBOX_CAPACITY_LARGE_CLASSES=beast,large` when an installation wants warning
hints for a different set of classes.
These fields are wire-compatible with mixed CLI/broker versions. Upgraded
brokers add optional response fields that older clients ignore. Upgraded
clients keep the lease request sparse: they omit default hint and routing fields
and do not send the capacity block at all for broker defaults, unless an
operator configures a non-default market/strategy/fallback, a multi-region pool,
pinned availability zones, or `capacity.hints: false`.
Crabbox tries ordered instance candidates for the requested class. Explicit
`--type` is exact: if EC2 rejects it, Crabbox fails clearly instead of silently
choosing another type.

View File

@ -349,7 +349,26 @@ func (c *CoordinatorClient) CreateLease(ctx context.Context, cfg Config, publicK
if slug == "" {
slug = newLeaseSlug(leaseID)
}
err := c.do(ctx, http.MethodPost, "/v1/leases", map[string]any{
capacity := map[string]any{}
if cfg.Capacity.Market != "" && cfg.Capacity.Market != "spot" {
capacity["market"] = cfg.Capacity.Market
}
if cfg.Capacity.Strategy != "" && cfg.Capacity.Strategy != "most-available" {
capacity["strategy"] = cfg.Capacity.Strategy
}
if cfg.Capacity.Fallback != "" && cfg.Capacity.Fallback != "on-demand-after-120s" {
capacity["fallback"] = cfg.Capacity.Fallback
}
if len(cfg.Capacity.Regions) > 0 {
capacity["regions"] = cfg.Capacity.Regions
}
if len(cfg.Capacity.AvailabilityZones) > 0 {
capacity["availabilityZones"] = cfg.Capacity.AvailabilityZones
}
if !cfg.Capacity.Hints {
capacity["hints"] = false
}
req := map[string]any{
"leaseID": leaseID,
"slug": slug,
"profile": cfg.Profile,
@ -377,24 +396,20 @@ func (c *CoordinatorClient) CreateLease(ctx context.Context, cfg Config, publicK
"awsRootGB": cfg.AWSRootGB,
"awsSSHCIDRs": cfg.AWSSSHCIDRs,
"awsMacHostID": cfg.AWSMacHostID,
"capacity": map[string]any{
"market": cfg.Capacity.Market,
"strategy": cfg.Capacity.Strategy,
"fallback": cfg.Capacity.Fallback,
"regions": cfg.Capacity.Regions,
"availabilityZones": cfg.Capacity.AvailabilityZones,
"hints": cfg.Capacity.Hints,
},
"sshUser": cfg.SSHUser,
"sshPort": cfg.SSHPort,
"sshFallbackPorts": cfg.SSHFallbackPorts,
"providerKey": cfg.ProviderKey,
"workRoot": cfg.WorkRoot,
"ttlSeconds": int(cfg.TTL.Seconds()),
"idleTimeoutSeconds": int(cfg.IdleTimeout.Seconds()),
"keep": keep,
"sshPublicKey": publicKey,
}, &res)
"sshUser": cfg.SSHUser,
"sshPort": cfg.SSHPort,
"sshFallbackPorts": cfg.SSHFallbackPorts,
"providerKey": cfg.ProviderKey,
"workRoot": cfg.WorkRoot,
"ttlSeconds": int(cfg.TTL.Seconds()),
"idleTimeoutSeconds": int(cfg.IdleTimeout.Seconds()),
"keep": keep,
"sshPublicKey": publicKey,
}
if len(capacity) > 0 {
req["capacity"] = capacity
}
err := c.do(ctx, http.MethodPost, "/v1/leases", req, &res)
return res.Lease, err
}

View File

@ -9,6 +9,7 @@ import (
"net/http/httptest"
"os"
"path/filepath"
"reflect"
"strings"
"testing"
"time"
@ -380,6 +381,7 @@ func TestCoordinatorCreateLeaseSendsAWSSSHCIDRs(t *testing.T) {
AWSSSHCIDRs []string `json:"awsSSHCIDRs"`
SSHFallbackPorts []string `json:"sshFallbackPorts"`
ServerTypeExplicit bool `json:"serverTypeExplicit"`
Capacity map[string]any
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost || r.URL.Path != "/v1/leases" {
@ -400,6 +402,12 @@ func TestCoordinatorCreateLeaseSendsAWSSSHCIDRs(t *testing.T) {
ServerTypeExplicit: true,
AWSSSHCIDRs: []string{"198.51.100.7/32"},
SSHFallbackPorts: []string{"22", "2022"},
Capacity: CapacityConfig{
Market: "spot",
Strategy: "most-available",
Fallback: "on-demand-after-120s",
Hints: true,
},
}, "ssh-ed25519 test", false, "cbx_123", "blue-crab")
if err != nil {
t.Fatal(err)
@ -413,6 +421,61 @@ func TestCoordinatorCreateLeaseSendsAWSSSHCIDRs(t *testing.T) {
if !body.ServerTypeExplicit {
t.Fatal("serverTypeExplicit=false, want true")
}
if body.Capacity != nil {
t.Fatalf("default capacity fields should be omitted for mixed-version brokers: %#v", body.Capacity)
}
}
func TestCoordinatorCreateLeaseSendsConfiguredCapacityExtensions(t *testing.T) {
var body struct {
Capacity map[string]any
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost || r.URL.Path != "/v1/leases" {
t.Fatalf("unexpected request %s %s", r.Method, r.URL.Path)
}
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
t.Fatal(err)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"lease":{"id":"cbx_123","provider":"aws","state":"active","host":"192.0.2.10"}}`))
}))
defer server.Close()
client := CoordinatorClient{BaseURL: server.URL, Client: server.Client()}
_, err := client.CreateLease(context.Background(), Config{
Provider: "aws",
Capacity: CapacityConfig{
Market: "spot",
Strategy: "most-available",
Fallback: "on-demand-after-120s",
Regions: []string{"eu-west-1", "eu-west-2"},
AvailabilityZones: []string{"eu-west-1a"},
Hints: false,
},
}, "ssh-ed25519 test", false, "cbx_123", "blue-crab")
if err != nil {
t.Fatal(err)
}
if got := stringSliceFromJSON(body.Capacity["regions"]); !reflect.DeepEqual(got, []string{"eu-west-1", "eu-west-2"}) {
t.Fatalf("capacity.regions=%v", got)
}
if got := stringSliceFromJSON(body.Capacity["availabilityZones"]); !reflect.DeepEqual(got, []string{"eu-west-1a"}) {
t.Fatalf("capacity.availabilityZones=%v", got)
}
if got, ok := body.Capacity["hints"].(bool); !ok || got {
t.Fatalf("capacity.hints=%#v, want false", body.Capacity["hints"])
}
}
func TestCoordinatorLeaseDecodesLegacyCapacityResponse(t *testing.T) {
var lease CoordinatorLease
if err := json.Unmarshal([]byte(`{"id":"cbx_123","provider":"aws","serverType":"c7a.8xlarge"}`), &lease); err != nil {
t.Fatal(err)
}
if lease.Market != "" || len(lease.ProvisioningAttempts) != 0 || len(lease.CapacityHints) != 0 {
t.Fatalf("new capacity fields should be optional: %#v", lease)
}
}
func TestCoordinatorLeaseDecodesProvisioningAttempts(t *testing.T) {
@ -439,6 +502,17 @@ func TestCoordinatorLeaseDecodesProvisioningAttempts(t *testing.T) {
}
}
func stringSliceFromJSON(value any) []string {
items, _ := value.([]any)
out := make([]string, 0, len(items))
for _, item := range items {
if s, ok := item.(string); ok {
out = append(out, s)
}
}
return out
}
func TestCoordinatorFallbackSummary(t *testing.T) {
summary := coordinatorFallbackSummary(CoordinatorLease{
RequestedServerType: "c7a.48xlarge",