fix: wait for delayed blacksmith cleanup

This commit is contained in:
Peter Steinberger 2026-05-04 23:09:41 +01:00
parent fb91f465df
commit ecdbe91adf
No known key found for this signature in database
3 changed files with 80 additions and 6 deletions

View File

@ -26,7 +26,7 @@
- Fixed `crabbox run --junit` so all-passing JUnit files record results instead of leaving the coordinator run stuck when the failure list is empty.
- Fixed `crabbox desktop launch --browser` on freshly warmed desktop leases by creating the remote workdir before launching the app.
- Fixed failed Blacksmith Testbox warmups so printed or newly listed `tbx_...` boxes are stopped instead of being left queued after an upstream workflow error.
- Fixed failed Blacksmith Testbox warmups so printed, newly listed, or delayed `tbx_...` boxes are stopped instead of being left queued after an upstream workflow error.
- Fixed Worker deploy smoke to prefer the Crabbox-scoped Cloudflare token when it is present in the environment or local profile.
- Fixed brokered Tailscale requests on coordinators without OAuth secrets so they fail as disabled instead of entering the auth-key minting path.
- Fixed native Windows `--shell` runs so multi-statement PowerShell scripts keep their quotes instead of being re-parsed by a nested PowerShell process.

View File

@ -16,9 +16,11 @@ import (
const blacksmithTestboxProvider = "blacksmith-testbox"
var (
blacksmithCommandContext = exec.CommandContext
blacksmithIDPattern = regexp.MustCompile(`\btbx_[A-Za-z0-9_-]+\b`)
blacksmithCleanupDelay = time.Second
blacksmithCommandContext = exec.CommandContext
blacksmithIDPattern = regexp.MustCompile(`\btbx_[A-Za-z0-9_-]+\b`)
blacksmithCleanupAttempts = 36
blacksmithCleanupDelay = 5 * time.Second
blacksmithCleanupQuiet = 12
)
type blacksmithRunOptions struct {
@ -350,7 +352,9 @@ func (a App) cleanupFailedBlacksmithWarmup(ctx context.Context, cfg Config, befo
before[leaseID] = true
}
}
for attempt := 0; attempt < 5; attempt++ {
stoppedAny := false
quietAttempts := 0
for attempt := 0; attempt < blacksmithCleanupAttempts; attempt++ {
if attempt > 0 {
select {
case <-ctx.Done():
@ -372,7 +376,15 @@ func (a App) cleanupFailedBlacksmithWarmup(ctx context.Context, cfg Config, befo
stopped = true
}
if stopped {
return
stoppedAny = true
quietAttempts = 0
continue
}
if stoppedAny {
quietAttempts++
if quietAttempts >= blacksmithCleanupQuiet {
return
}
}
}
}

View File

@ -133,7 +133,11 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) {
t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config"))
original := blacksmithCommandContext
originalDelay := blacksmithCleanupDelay
originalAttempts := blacksmithCleanupAttempts
originalQuiet := blacksmithCleanupQuiet
blacksmithCleanupDelay = time.Millisecond
blacksmithCleanupAttempts = 3
blacksmithCleanupQuiet = 1
var stopped string
listCalls := 0
blacksmithCommandContext = func(_ context.Context, _ string, args ...string) *exec.Cmd {
@ -157,6 +161,8 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) {
t.Cleanup(func() {
blacksmithCommandContext = original
blacksmithCleanupDelay = originalDelay
blacksmithCleanupAttempts = originalAttempts
blacksmithCleanupQuiet = originalQuiet
})
cfg := baseConfig()
@ -173,6 +179,62 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) {
}
}
func TestBlacksmithWarmupFailureContinuesAfterFirstDelayedStop(t *testing.T) {
home := t.TempDir()
t.Setenv("HOME", home)
t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config"))
original := blacksmithCommandContext
originalDelay := blacksmithCleanupDelay
originalAttempts := blacksmithCleanupAttempts
originalQuiet := blacksmithCleanupQuiet
blacksmithCleanupDelay = time.Millisecond
blacksmithCleanupAttempts = 5
blacksmithCleanupQuiet = 1
stopped := []string{}
listCalls := 0
blacksmithCommandContext = func(_ context.Context, _ string, args ...string) *exec.Cmd {
if len(args) >= 3 && args[0] == "testbox" && args[1] == "list" {
listCalls++
switch listCalls {
case 2:
return exec.Command("sh", "-c", "printf 'tbx_delayed1 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:47.000000Z\\n'")
case 3:
return exec.Command("sh", "-c", "printf 'tbx_delayed2 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:48.000000Z\\n'")
default:
return exec.Command("sh", "-c", "printf 'ID STATUS REPO WORKFLOW JOB REF CREATED\\n'")
}
}
if len(args) >= 3 && args[0] == "testbox" && args[1] == "stop" {
for i, arg := range args {
if arg == "--id" && i+1 < len(args) {
stopped = append(stopped, args[i+1])
}
}
return exec.Command("sh", "-c", "exit 0")
}
return exec.Command("sh", "-c", "printf 'workflow missing\\n'; exit 1")
}
t.Cleanup(func() {
blacksmithCommandContext = original
blacksmithCleanupDelay = originalDelay
blacksmithCleanupAttempts = originalAttempts
blacksmithCleanupQuiet = originalQuiet
})
cfg := baseConfig()
cfg.Blacksmith.Workflow = ".github/workflows/testbox.yml"
cfg.Blacksmith.Job = "check"
cfg.Blacksmith.Ref = "main"
app := App{Stdout: io.Discard, Stderr: io.Discard}
_, _, err := app.blacksmithWarmupLease(context.Background(), cfg, Repo{Root: "/repo"}, false)
if err == nil {
t.Fatal("expected warmup failure")
}
if !reflect.DeepEqual(stopped, []string{"tbx_delayed1", "tbx_delayed2"}) {
t.Fatalf("stopped=%v, want both delayed testboxes", stopped)
}
}
func TestBlacksmithOneShotRunRemovesClaimAfterStop(t *testing.T) {
home := t.TempDir()
t.Setenv("HOME", home)