fix: stop aggressive retry on auth failures, add setup nudge (#206)
Some checks are pending
Build and Test / test (push) Waiting to run
Build and Test / build (win-arm64) (push) Blocked by required conditions
Build and Test / build (win-x64) (push) Blocked by required conditions
Build and Test / build-extension (arm64) (push) Blocked by required conditions
Build and Test / build-extension (x64) (push) Blocked by required conditions
Build and Test / release (push) Blocked by required conditions
Build and Test / build-msix (ARM64, win-arm64) (push) Blocked by required conditions
Build and Test / build-msix (x64, win-x64) (push) Blocked by required conditions

Part 1 (fixes #198): Move reconnect attempt counter reset from TCP
connect to application-level hello-ok handshake. Auth failures now
properly progress through backoff (1s→2s→4s→8s→15s→30s→60s) instead
of resetting to 1s on every attempt.

Part 2 (closes #199): Detect terminal auth errors (token mismatch,
origin not allowed, rate limited, signature exhausted) and stop
retrying entirely. Fire AuthenticationFailed event so the UI shows
a setup nudge in the tray menu.

Security: exhausted device signature modes (all 4 rejected) are now
treated as terminal — no infinite cycling. Error messages stay local
(log file + tray menu only, never sent externally).

Pairing success already sends a toast notification (Toast_NodePaired)
in all 5 locales.
This commit is contained in:
Scott Hanselman 2026-04-23 12:10:32 -07:00 committed by GitHub
parent 5a97268ec0
commit 12eb2a4d7d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 57 additions and 2 deletions

View File

@ -58,6 +58,7 @@ public class OpenClawGatewayClient : WebSocketClientBase
private bool _nodeListUnsupported;
private bool _operatorReadScopeUnavailable;
private bool _pairingRequiredAwaitingApproval;
private bool _authFailed;
private IReadOnlyList<UserNotificationRule>? _userRules;
private bool _preferStructuredCategories = true;
@ -103,7 +104,7 @@ public class OpenClawGatewayClient : WebSocketClientBase
protected override bool ShouldAutoReconnect()
{
return !_pairingRequiredAwaitingApproval;
return !_pairingRequiredAwaitingApproval && !_authFailed;
}
protected override void OnDisconnected()
@ -665,6 +666,8 @@ public class OpenClawGatewayClient : WebSocketClientBase
if (payload.TryGetProperty("type", out var t) && t.GetString() == "hello-ok")
{
_pairingRequiredAwaitingApproval = false;
_authFailed = false;
ResetReconnectAttempts();
_operatorDeviceId = TryGetHandshakeDeviceId(payload);
_grantedOperatorScopes = TryGetHandshakeScopes(payload);
_mainSessionKey = TryGetHandshakeMainSessionKey(payload) ?? "main";
@ -792,6 +795,9 @@ public class OpenClawGatewayClient : WebSocketClientBase
}
_logger.Warn("Gateway rejected device signature in all supported payload modes");
_authFailed = true;
RaiseAuthenticationFailed("device signature rejected in all modes — the gateway may require a different auth protocol version");
RaiseStatusChanged(ConnectionStatus.Error);
return;
}
@ -804,6 +810,15 @@ public class OpenClawGatewayClient : WebSocketClientBase
return;
}
// Permanent auth failures — stop retrying and notify the app
if (method == "connect" && IsTerminalAuthError(message))
{
_authFailed = true;
RaiseAuthenticationFailed(message);
RaiseStatusChanged(ConnectionStatus.Error);
return;
}
if (IsMissingScopeError(message, "operator.read") &&
method is "sessions.list" or "usage.status" or "usage.cost" or "node.list")
{
@ -904,6 +919,13 @@ public class OpenClawGatewayClient : WebSocketClientBase
return errorMessage.Contains("unknown method", StringComparison.OrdinalIgnoreCase);
}
private static bool IsTerminalAuthError(string errorMessage)
{
return errorMessage.Contains("token mismatch", StringComparison.OrdinalIgnoreCase) ||
errorMessage.Contains("origin not allowed", StringComparison.OrdinalIgnoreCase) ||
errorMessage.Contains("too many failed", StringComparison.OrdinalIgnoreCase);
}
private static bool IsMissingScopeError(string errorMessage, string scope)
{
if (string.IsNullOrWhiteSpace(errorMessage) || string.IsNullOrWhiteSpace(scope))

View File

@ -40,6 +40,17 @@ public abstract class WebSocketClientBase : IDisposable
// Events
public event EventHandler<ConnectionStatus>? StatusChanged;
public event EventHandler<string>? AuthenticationFailed;
/// <summary>Reset reconnect backoff counter. Call after successful application-level handshake.</summary>
protected void ResetReconnectAttempts() => _reconnectAttempts = 0;
/// <summary>Fire AuthenticationFailed event and stop auto-reconnect.</summary>
protected void RaiseAuthenticationFailed(string message)
{
_logger.Warn($"{ClientRole} authentication failed: {message}");
AuthenticationFailed?.Invoke(this, message);
}
// --- Abstract members (subclass MUST implement) ---
@ -123,7 +134,9 @@ public abstract class WebSocketClientBase : IDisposable
await _webSocket.ConnectAsync(uri, _cts.Token);
_reconnectAttempts = 0;
// Don't reset _reconnectAttempts here — TCP connect succeeding doesn't mean
// auth will succeed. Reset only after the full application-level handshake
// completes (subclass calls ResetReconnectAttempts after hello-ok).
_logger.Info($"{ClientRole} connected, waiting for challenge...");
await OnConnectedAsync();

View File

@ -530,6 +530,7 @@ public class WindowsNodeClient : WebSocketClientBase
{
var reconnectingAfterApproval = _pairingApprovedAwaitingReconnect;
_isConnected = true;
ResetReconnectAttempts();
// Extract node ID if returned
if (payload.TryGetProperty("nodeId", out var nodeIdProp))

View File

@ -87,6 +87,7 @@ public partial class App : Application
private ActivityStreamWindow? _activityStreamWindow;
private TrayMenuWindow? _trayMenuWindow;
private QuickSendDialog? _quickSendDialog;
private string? _authFailureMessage;
// Node service (optional, enabled in settings)
private NodeService? _nodeService;
@ -753,6 +754,12 @@ public partial class App : Application
var statusIcon = MenuDisplayHelper.GetStatusIcon(_currentStatus);
menu.AddMenuItem(string.Format(LocalizationHelper.GetString("Menu_StatusFormat"), LocalizationHelper.GetConnectionStatusText(_currentStatus)), statusIcon, "status");
// Auth failure nudge
if (!string.IsNullOrEmpty(_authFailureMessage))
{
menu.AddMenuItem("⚠️ Auth failed — Run Setup", "🔧", "setup");
}
// Activity (if any)
if (_currentActivity != null && _currentActivity.Kind != OpenClaw.Shared.ActivityKind.Idle)
{
@ -1108,6 +1115,7 @@ public partial class App : Application
_gatewayClient.SetUserRules(_settings.UserRules.Count > 0 ? _settings.UserRules : null);
_gatewayClient.SetPreferStructuredCategories(_settings.PreferStructuredCategories);
_gatewayClient.StatusChanged += OnConnectionStatusChanged;
_gatewayClient.AuthenticationFailed += OnAuthenticationFailed;
_gatewayClient.ActivityChanged += OnActivityChanged;
_gatewayClient.NotificationReceived += OnNotificationReceived;
_gatewayClient.ChannelHealthUpdated += OnChannelHealthUpdated;
@ -1126,6 +1134,7 @@ public partial class App : Application
if (_gatewayClient != null)
{
_gatewayClient.StatusChanged -= OnConnectionStatusChanged;
_gatewayClient.AuthenticationFailed -= OnAuthenticationFailed;
_gatewayClient.ActivityChanged -= OnActivityChanged;
_gatewayClient.NotificationReceived -= OnNotificationReceived;
_gatewayClient.ChannelHealthUpdated -= OnChannelHealthUpdated;
@ -1245,6 +1254,8 @@ public partial class App : Application
private void OnConnectionStatusChanged(object? sender, ConnectionStatus status)
{
_currentStatus = status;
if (status == ConnectionStatus.Connected)
_authFailureMessage = null;
UpdateTrayIcon();
if (status == ConnectionStatus.Connected)
@ -1253,6 +1264,14 @@ public partial class App : Application
}
}
private void OnAuthenticationFailed(object? sender, string message)
{
_authFailureMessage = message;
Logger.Error($"Authentication failed: {message}");
AddRecentActivity($"Auth failed: {message}", category: "error");
UpdateTrayIcon();
}
private void OnActivityChanged(object? sender, AgentActivity? activity)
{
if (activity == null)