Compare commits
2 Commits
main
...
fix-stale-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e3297ccc91 | ||
|
|
7c0f517d53 |
@ -6,6 +6,10 @@
|
||||
|
||||
- Added `provider: azure` for managed Azure Linux and native Windows SSH leases, including direct and brokered provisioning, shared Azure networking, SKU fallback, Azure docs, and cleanup support. Thanks @jwmoss.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed stale Code, WebVNC, and egress bridge clients so expired or missing leases stop polling/restarting after terminal coordinator responses. Thanks @vincentkoc.
|
||||
|
||||
## 0.7.0 - 2026-05-07
|
||||
|
||||
### Added
|
||||
|
||||
@ -24,6 +24,20 @@ type CoordinatorClient struct {
|
||||
Client *http.Client
|
||||
}
|
||||
|
||||
type CoordinatorHTTPError struct {
|
||||
Method string
|
||||
Path string
|
||||
StatusCode int
|
||||
Message string
|
||||
}
|
||||
|
||||
func (e CoordinatorHTTPError) Error() string {
|
||||
if e.Message != "" {
|
||||
return fmt.Sprintf("coordinator %s %s: http %d: %s", e.Method, e.Path, e.StatusCode, e.Message)
|
||||
}
|
||||
return fmt.Sprintf("coordinator %s %s: http %d", e.Method, e.Path, e.StatusCode)
|
||||
}
|
||||
|
||||
type CoordinatorLease struct {
|
||||
ID string `json:"id"`
|
||||
Slug string `json:"slug,omitempty"`
|
||||
@ -1077,10 +1091,12 @@ func decodeCoordinatorResponse(method, path string, statusCode int, body io.Read
|
||||
if statusCode < 200 || statusCode >= 300 {
|
||||
data, _ := io.ReadAll(io.LimitReader(body, 600))
|
||||
msg := strings.TrimSpace(string(data))
|
||||
if msg != "" {
|
||||
return fmt.Errorf("coordinator %s %s: http %d: %s", method, path, statusCode, msg)
|
||||
return CoordinatorHTTPError{
|
||||
Method: method,
|
||||
Path: path,
|
||||
StatusCode: statusCode,
|
||||
Message: msg,
|
||||
}
|
||||
return fmt.Errorf("coordinator %s %s: http %d", method, path, statusCode)
|
||||
}
|
||||
if out != nil {
|
||||
if buf, ok := out.(*bytes.Buffer); ok {
|
||||
|
||||
@ -34,6 +34,7 @@ const (
|
||||
egressDialTimeout = 15 * time.Second
|
||||
egressRemoteReadyWait = 5 * time.Second
|
||||
egressDaemonRestartWait = 1 * time.Second
|
||||
egressDaemonFatalCode = 4
|
||||
)
|
||||
|
||||
type egressProxyMessage struct {
|
||||
@ -114,6 +115,9 @@ func (a App) egressHost(ctx context.Context, args []string) error {
|
||||
}
|
||||
bridge, err := connectEgressBridge(ctx, coord, leaseID, "host", *ticket, *sessionID, *profile, allow)
|
||||
if err != nil {
|
||||
if fatalEgressBridgeSetupError(err) {
|
||||
return exit(egressDaemonFatalCode, "egress lease unavailable: %v", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(a.Stdout, "egress host: connected lease=%s session=%s profile=%s allow=%s\n", leaseID, bridge.sessionID, blank(*profile, "-"), strings.Join(allow, ","))
|
||||
@ -145,6 +149,9 @@ func (a App) egressClient(ctx context.Context, args []string) error {
|
||||
}
|
||||
bridge, err := connectEgressBridge(ctx, coord, leaseID, "client", *ticket, *sessionID, "", nil)
|
||||
if err != nil {
|
||||
if fatalEgressBridgeSetupError(err) {
|
||||
return exit(egressDaemonFatalCode, "egress lease unavailable: %v", err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(a.Stdout, "egress client: connected lease=%s session=%s listen=%s\n", leaseID, bridge.sessionID, *listen)
|
||||
@ -368,6 +375,19 @@ func connectEgressBridge(ctx context.Context, coord *CoordinatorClient, leaseID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func fatalEgressBridgeSetupError(err error) bool {
|
||||
var httpErr CoordinatorHTTPError
|
||||
if !errors.As(err, &httpErr) {
|
||||
return false
|
||||
}
|
||||
switch httpErr.StatusCode {
|
||||
case http.StatusForbidden, http.StatusNotFound, http.StatusGone, http.StatusConflict:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func reusableEgressSessionID(ctx context.Context, coord *CoordinatorClient, leaseID, sessionID string) (string, error) {
|
||||
if strings.TrimSpace(sessionID) != "" {
|
||||
return strings.TrimSpace(sessionID), nil
|
||||
@ -954,6 +974,10 @@ func egressDaemonSupervisorScript(exe string, args []string) string {
|
||||
"while :; do\n" +
|
||||
" " + strings.Join(argv, " ") + "\n" +
|
||||
" code=$?\n" +
|
||||
" if [ \"$code\" = " + strconv.Itoa(egressDaemonFatalCode) + " ]; then\n" +
|
||||
" echo \"egress daemon supervisor: child exited fatal code=$code; stopping\"\n" +
|
||||
" exit \"$code\"\n" +
|
||||
" fi\n" +
|
||||
" echo \"egress daemon supervisor: child exited code=$code; restarting in 1s\"\n" +
|
||||
" sleep " + strconv.Itoa(int(egressDaemonRestartWait/time.Second)) + "\n" +
|
||||
"done\n"
|
||||
|
||||
@ -135,6 +135,32 @@ func TestManualEgressTicketCreationReusesActiveSession(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFatalEgressBridgeSetupError(t *testing.T) {
|
||||
fatalStatuses := []int{http.StatusForbidden, http.StatusNotFound, http.StatusGone, http.StatusConflict}
|
||||
for _, status := range fatalStatuses {
|
||||
err := CoordinatorHTTPError{StatusCode: status}
|
||||
if !fatalEgressBridgeSetupError(err) {
|
||||
t.Fatalf("status %d should stop stale egress bridge retries", status)
|
||||
}
|
||||
}
|
||||
if fatalEgressBridgeSetupError(CoordinatorHTTPError{StatusCode: http.StatusTooManyRequests}) {
|
||||
t.Fatal("transient coordinator errors should stay retryable")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEgressDaemonSupervisorStopsOnFatalExit(t *testing.T) {
|
||||
script := egressDaemonSupervisorScript("crabbox", []string{"egress", "host"})
|
||||
for _, want := range []string{
|
||||
`if [ "$code" = 4 ]; then`,
|
||||
`egress daemon supervisor: child exited fatal code=$code; stopping`,
|
||||
`exit "$code"`,
|
||||
} {
|
||||
if !strings.Contains(script, want) {
|
||||
t.Fatalf("supervisor script missing %q:\n%s", want, script)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEgressRequestHostPort(t *testing.T) {
|
||||
connect := &http.Request{Method: http.MethodConnect, Host: "discord.com:443"}
|
||||
host, port, err := egressRequestHostPort(connect)
|
||||
|
||||
@ -596,9 +596,18 @@ export function portalVNC(lease: LeaseRecord): Response {
|
||||
let statusTimer;
|
||||
let controllerLabel = "";
|
||||
let isController = false;
|
||||
const terminalStatusCodes = new Set([403, 404, 409, 410]);
|
||||
function retryDelay() {
|
||||
return Math.min(5000, 500 * 2 ** retryAttempt);
|
||||
}
|
||||
async function responseMessage(response, fallback) {
|
||||
try {
|
||||
const body = await response.json();
|
||||
return body.message || body.error || fallback;
|
||||
} catch (_) {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
function fallbackCopyText(text) {
|
||||
const ta = document.createElement("textarea");
|
||||
ta.value = text;
|
||||
@ -625,9 +634,16 @@ export function portalVNC(lease: LeaseRecord): Response {
|
||||
async function bridgeState() {
|
||||
try {
|
||||
const response = await fetch(statusURL, { cache: "no-store" });
|
||||
return response.ok ? await response.json() : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
if (response.ok) {
|
||||
return await response.json();
|
||||
}
|
||||
const message = await responseMessage(response, "WebVNC bridge unavailable");
|
||||
if (terminalStatusCodes.has(response.status)) {
|
||||
return { terminal: true, message };
|
||||
}
|
||||
return { transient: true, message };
|
||||
} catch (error) {
|
||||
return { transient: true, message: error instanceof Error ? error.message : String(error) };
|
||||
}
|
||||
}
|
||||
function applyCollaborationState(state) {
|
||||
@ -662,6 +678,15 @@ export function portalVNC(lease: LeaseRecord): Response {
|
||||
applyCollaborationState(state);
|
||||
return state;
|
||||
}
|
||||
function stopPolling(label) {
|
||||
stopped = true;
|
||||
connected = false;
|
||||
window.clearTimeout(retryTimer);
|
||||
window.clearInterval(statusTimer);
|
||||
try { rfb?.disconnect(); } catch (_) {}
|
||||
screen.replaceChildren();
|
||||
setStatus(label, "bad");
|
||||
}
|
||||
function scheduleRetry(label) {
|
||||
if (stopped) return;
|
||||
const delay = retryDelay();
|
||||
@ -676,6 +701,14 @@ export function portalVNC(lease: LeaseRecord): Response {
|
||||
screen.replaceChildren();
|
||||
try {
|
||||
const state = await bridgeState();
|
||||
if (state?.terminal) {
|
||||
stopPolling(state.message || "WebVNC bridge unavailable");
|
||||
return;
|
||||
}
|
||||
if (state?.transient) {
|
||||
scheduleRetry(state.message || "WebVNC status unavailable");
|
||||
return;
|
||||
}
|
||||
if (state && !state.bridgeConnected) {
|
||||
scheduleRetry(state.message || "WebVNC daemon not running; run the bridge command below");
|
||||
return;
|
||||
@ -925,15 +958,39 @@ export function portalCode(lease: LeaseRecord): Response {
|
||||
const hint = document.getElementById("code-hint");
|
||||
const statusURL = new URL(${JSON.stringify(statusPath)}, window.location.href);
|
||||
let pollTimer;
|
||||
let stopped = false;
|
||||
const terminalStatusCodes = new Set([403, 404, 409, 410]);
|
||||
function setStatus(value, tone = "") {
|
||||
status.textContent = value;
|
||||
status.dataset.tone = tone;
|
||||
}
|
||||
async function responseMessage(response, fallback) {
|
||||
try {
|
||||
const body = await response.json();
|
||||
return body.message || body.error || fallback;
|
||||
} catch (_) {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
function stopPolling(message) {
|
||||
stopped = true;
|
||||
window.clearTimeout(pollTimer);
|
||||
setStatus("bridge unavailable", "bad");
|
||||
hint.textContent = message || "This lease is no longer available. Open a current lease from the portal.";
|
||||
}
|
||||
async function pollBridge() {
|
||||
if (stopped) return;
|
||||
window.clearTimeout(pollTimer);
|
||||
try {
|
||||
const response = await fetch(statusURL, { cache: "no-store" });
|
||||
if (!response.ok) throw new Error("status " + response.status);
|
||||
if (!response.ok) {
|
||||
const message = await responseMessage(response, "Code bridge status unavailable");
|
||||
if (terminalStatusCodes.has(response.status)) {
|
||||
stopPolling(message);
|
||||
return;
|
||||
}
|
||||
throw new Error(message);
|
||||
}
|
||||
const state = await response.json();
|
||||
if (state?.code?.agentConnected) {
|
||||
setStatus("bridge connected; opening", "ok");
|
||||
@ -947,7 +1004,9 @@ export function portalCode(lease: LeaseRecord): Response {
|
||||
setStatus("status unavailable", "bad");
|
||||
hint.textContent = "Could not read bridge status. Reload or use the command below.";
|
||||
}
|
||||
pollTimer = window.setTimeout(pollBridge, 2000);
|
||||
if (!stopped) {
|
||||
pollTimer = window.setTimeout(pollBridge, 2000);
|
||||
}
|
||||
}
|
||||
document.getElementById("code-reload")?.addEventListener("click", () => {
|
||||
window.location.reload();
|
||||
|
||||
@ -1390,6 +1390,8 @@ describe("fleet lease identity and idle", () => {
|
||||
expect(pageBody).toContain('id="code-copy"');
|
||||
expect(pageBody).toContain("/portal/leases/cbx_000000000001/code/health");
|
||||
expect(pageBody).toContain("window.location.reload()");
|
||||
expect(pageBody).toContain("terminalStatusCodes");
|
||||
expect(pageBody).toContain("stopPolling(message)");
|
||||
|
||||
const health = await fleet.fetch(
|
||||
request("GET", "/portal/leases/blue-lobster/code/health", { headers }),
|
||||
@ -1701,6 +1703,8 @@ describe("fleet lease identity and idle", () => {
|
||||
expect(pageBody).not.toContain("vnc-role");
|
||||
expect(pageBody).not.toContain("status-pill vnc-role");
|
||||
expect(pageBody).toContain("rfb.viewOnly = !controlling");
|
||||
expect(pageBody).toContain("state?.terminal");
|
||||
expect(pageBody).toContain("stopPolling(state.message");
|
||||
expect(pageBody).toContain('fragment.get("username")');
|
||||
expect(pageBody).toContain('types.includes("username")');
|
||||
expect(pageBody).not.toContain("cdn.jsdelivr.net");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user