From ecdbe91adffe0c2ca04d15054f8051178ba9966c Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 4 May 2026 23:09:41 +0100 Subject: [PATCH] fix: wait for delayed blacksmith cleanup --- CHANGELOG.md | 2 +- internal/cli/blacksmith.go | 22 +++++++++--- internal/cli/blacksmith_test.go | 62 +++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29ad9c2..9fd0b39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ - Fixed `crabbox run --junit` so all-passing JUnit files record results instead of leaving the coordinator run stuck when the failure list is empty. - Fixed `crabbox desktop launch --browser` on freshly warmed desktop leases by creating the remote workdir before launching the app. -- Fixed failed Blacksmith Testbox warmups so printed or newly listed `tbx_...` boxes are stopped instead of being left queued after an upstream workflow error. +- Fixed failed Blacksmith Testbox warmups so printed, newly listed, or delayed `tbx_...` boxes are stopped instead of being left queued after an upstream workflow error. - Fixed Worker deploy smoke to prefer the Crabbox-scoped Cloudflare token when it is present in the environment or local profile. - Fixed brokered Tailscale requests on coordinators without OAuth secrets so they fail as disabled instead of entering the auth-key minting path. - Fixed native Windows `--shell` runs so multi-statement PowerShell scripts keep their quotes instead of being re-parsed by a nested PowerShell process. diff --git a/internal/cli/blacksmith.go b/internal/cli/blacksmith.go index 6a18e9e..9c4fcbc 100644 --- a/internal/cli/blacksmith.go +++ b/internal/cli/blacksmith.go @@ -16,9 +16,11 @@ import ( const blacksmithTestboxProvider = "blacksmith-testbox" var ( - blacksmithCommandContext = exec.CommandContext - blacksmithIDPattern = regexp.MustCompile(`\btbx_[A-Za-z0-9_-]+\b`) - blacksmithCleanupDelay = time.Second + blacksmithCommandContext = exec.CommandContext + blacksmithIDPattern = regexp.MustCompile(`\btbx_[A-Za-z0-9_-]+\b`) + blacksmithCleanupAttempts = 36 + blacksmithCleanupDelay = 5 * time.Second + blacksmithCleanupQuiet = 12 ) type blacksmithRunOptions struct { @@ -350,7 +352,9 @@ func (a App) cleanupFailedBlacksmithWarmup(ctx context.Context, cfg Config, befo before[leaseID] = true } } - for attempt := 0; attempt < 5; attempt++ { + stoppedAny := false + quietAttempts := 0 + for attempt := 0; attempt < blacksmithCleanupAttempts; attempt++ { if attempt > 0 { select { case <-ctx.Done(): @@ -372,7 +376,15 @@ func (a App) cleanupFailedBlacksmithWarmup(ctx context.Context, cfg Config, befo stopped = true } if stopped { - return + stoppedAny = true + quietAttempts = 0 + continue + } + if stoppedAny { + quietAttempts++ + if quietAttempts >= blacksmithCleanupQuiet { + return + } } } } diff --git a/internal/cli/blacksmith_test.go b/internal/cli/blacksmith_test.go index 8f8214a..0fa5790 100644 --- a/internal/cli/blacksmith_test.go +++ b/internal/cli/blacksmith_test.go @@ -133,7 +133,11 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) { t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config")) original := blacksmithCommandContext originalDelay := blacksmithCleanupDelay + originalAttempts := blacksmithCleanupAttempts + originalQuiet := blacksmithCleanupQuiet blacksmithCleanupDelay = time.Millisecond + blacksmithCleanupAttempts = 3 + blacksmithCleanupQuiet = 1 var stopped string listCalls := 0 blacksmithCommandContext = func(_ context.Context, _ string, args ...string) *exec.Cmd { @@ -157,6 +161,8 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) { t.Cleanup(func() { blacksmithCommandContext = original blacksmithCleanupDelay = originalDelay + blacksmithCleanupAttempts = originalAttempts + blacksmithCleanupQuiet = originalQuiet }) cfg := baseConfig() @@ -173,6 +179,62 @@ func TestBlacksmithWarmupFailureStopsNewListedTestbox(t *testing.T) { } } +func TestBlacksmithWarmupFailureContinuesAfterFirstDelayedStop(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, ".config")) + original := blacksmithCommandContext + originalDelay := blacksmithCleanupDelay + originalAttempts := blacksmithCleanupAttempts + originalQuiet := blacksmithCleanupQuiet + blacksmithCleanupDelay = time.Millisecond + blacksmithCleanupAttempts = 5 + blacksmithCleanupQuiet = 1 + stopped := []string{} + listCalls := 0 + blacksmithCommandContext = func(_ context.Context, _ string, args ...string) *exec.Cmd { + if len(args) >= 3 && args[0] == "testbox" && args[1] == "list" { + listCalls++ + switch listCalls { + case 2: + return exec.Command("sh", "-c", "printf 'tbx_delayed1 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:47.000000Z\\n'") + case 3: + return exec.Command("sh", "-c", "printf 'tbx_delayed2 queued openclaw .github/workflows/testbox.yml check main 2026-05-04T21:23:48.000000Z\\n'") + default: + return exec.Command("sh", "-c", "printf 'ID STATUS REPO WORKFLOW JOB REF CREATED\\n'") + } + } + if len(args) >= 3 && args[0] == "testbox" && args[1] == "stop" { + for i, arg := range args { + if arg == "--id" && i+1 < len(args) { + stopped = append(stopped, args[i+1]) + } + } + return exec.Command("sh", "-c", "exit 0") + } + return exec.Command("sh", "-c", "printf 'workflow missing\\n'; exit 1") + } + t.Cleanup(func() { + blacksmithCommandContext = original + blacksmithCleanupDelay = originalDelay + blacksmithCleanupAttempts = originalAttempts + blacksmithCleanupQuiet = originalQuiet + }) + + cfg := baseConfig() + cfg.Blacksmith.Workflow = ".github/workflows/testbox.yml" + cfg.Blacksmith.Job = "check" + cfg.Blacksmith.Ref = "main" + app := App{Stdout: io.Discard, Stderr: io.Discard} + _, _, err := app.blacksmithWarmupLease(context.Background(), cfg, Repo{Root: "/repo"}, false) + if err == nil { + t.Fatal("expected warmup failure") + } + if !reflect.DeepEqual(stopped, []string{"tbx_delayed1", "tbx_delayed2"}) { + t.Fatalf("stopped=%v, want both delayed testboxes", stopped) + } +} + func TestBlacksmithOneShotRunRemovesClaimAfterStop(t *testing.T) { home := t.TempDir() t.Setenv("HOME", home)