fix: stop aggressive retry on auth failures, add setup nudge (#206)
Some checks are pending
Build and Test / test (push) Waiting to run
Build and Test / build (win-arm64) (push) Blocked by required conditions
Build and Test / build (win-x64) (push) Blocked by required conditions
Build and Test / build-extension (arm64) (push) Blocked by required conditions
Build and Test / build-extension (x64) (push) Blocked by required conditions
Build and Test / release (push) Blocked by required conditions
Build and Test / build-msix (ARM64, win-arm64) (push) Blocked by required conditions
Build and Test / build-msix (x64, win-x64) (push) Blocked by required conditions
Some checks are pending
Build and Test / test (push) Waiting to run
Build and Test / build (win-arm64) (push) Blocked by required conditions
Build and Test / build (win-x64) (push) Blocked by required conditions
Build and Test / build-extension (arm64) (push) Blocked by required conditions
Build and Test / build-extension (x64) (push) Blocked by required conditions
Build and Test / release (push) Blocked by required conditions
Build and Test / build-msix (ARM64, win-arm64) (push) Blocked by required conditions
Build and Test / build-msix (x64, win-x64) (push) Blocked by required conditions
Part 1 (fixes #198): Move reconnect attempt counter reset from TCP connect to application-level hello-ok handshake. Auth failures now properly progress through backoff (1s→2s→4s→8s→15s→30s→60s) instead of resetting to 1s on every attempt. Part 2 (closes #199): Detect terminal auth errors (token mismatch, origin not allowed, rate limited, signature exhausted) and stop retrying entirely. Fire AuthenticationFailed event so the UI shows a setup nudge in the tray menu. Security: exhausted device signature modes (all 4 rejected) are now treated as terminal — no infinite cycling. Error messages stay local (log file + tray menu only, never sent externally). Pairing success already sends a toast notification (Toast_NodePaired) in all 5 locales.
This commit is contained in:
parent
5a97268ec0
commit
12eb2a4d7d
@ -58,6 +58,7 @@ public class OpenClawGatewayClient : WebSocketClientBase
|
||||
private bool _nodeListUnsupported;
|
||||
private bool _operatorReadScopeUnavailable;
|
||||
private bool _pairingRequiredAwaitingApproval;
|
||||
private bool _authFailed;
|
||||
private IReadOnlyList<UserNotificationRule>? _userRules;
|
||||
private bool _preferStructuredCategories = true;
|
||||
|
||||
@ -103,7 +104,7 @@ public class OpenClawGatewayClient : WebSocketClientBase
|
||||
|
||||
protected override bool ShouldAutoReconnect()
|
||||
{
|
||||
return !_pairingRequiredAwaitingApproval;
|
||||
return !_pairingRequiredAwaitingApproval && !_authFailed;
|
||||
}
|
||||
|
||||
protected override void OnDisconnected()
|
||||
@ -665,6 +666,8 @@ public class OpenClawGatewayClient : WebSocketClientBase
|
||||
if (payload.TryGetProperty("type", out var t) && t.GetString() == "hello-ok")
|
||||
{
|
||||
_pairingRequiredAwaitingApproval = false;
|
||||
_authFailed = false;
|
||||
ResetReconnectAttempts();
|
||||
_operatorDeviceId = TryGetHandshakeDeviceId(payload);
|
||||
_grantedOperatorScopes = TryGetHandshakeScopes(payload);
|
||||
_mainSessionKey = TryGetHandshakeMainSessionKey(payload) ?? "main";
|
||||
@ -792,6 +795,9 @@ public class OpenClawGatewayClient : WebSocketClientBase
|
||||
}
|
||||
|
||||
_logger.Warn("Gateway rejected device signature in all supported payload modes");
|
||||
_authFailed = true;
|
||||
RaiseAuthenticationFailed("device signature rejected in all modes — the gateway may require a different auth protocol version");
|
||||
RaiseStatusChanged(ConnectionStatus.Error);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -804,6 +810,15 @@ public class OpenClawGatewayClient : WebSocketClientBase
|
||||
return;
|
||||
}
|
||||
|
||||
// Permanent auth failures — stop retrying and notify the app
|
||||
if (method == "connect" && IsTerminalAuthError(message))
|
||||
{
|
||||
_authFailed = true;
|
||||
RaiseAuthenticationFailed(message);
|
||||
RaiseStatusChanged(ConnectionStatus.Error);
|
||||
return;
|
||||
}
|
||||
|
||||
if (IsMissingScopeError(message, "operator.read") &&
|
||||
method is "sessions.list" or "usage.status" or "usage.cost" or "node.list")
|
||||
{
|
||||
@ -904,6 +919,13 @@ public class OpenClawGatewayClient : WebSocketClientBase
|
||||
return errorMessage.Contains("unknown method", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static bool IsTerminalAuthError(string errorMessage)
|
||||
{
|
||||
return errorMessage.Contains("token mismatch", StringComparison.OrdinalIgnoreCase) ||
|
||||
errorMessage.Contains("origin not allowed", StringComparison.OrdinalIgnoreCase) ||
|
||||
errorMessage.Contains("too many failed", StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
private static bool IsMissingScopeError(string errorMessage, string scope)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(errorMessage) || string.IsNullOrWhiteSpace(scope))
|
||||
|
||||
@ -40,6 +40,17 @@ public abstract class WebSocketClientBase : IDisposable
|
||||
|
||||
// Events
|
||||
public event EventHandler<ConnectionStatus>? StatusChanged;
|
||||
public event EventHandler<string>? AuthenticationFailed;
|
||||
|
||||
/// <summary>Reset reconnect backoff counter. Call after successful application-level handshake.</summary>
|
||||
protected void ResetReconnectAttempts() => _reconnectAttempts = 0;
|
||||
|
||||
/// <summary>Fire AuthenticationFailed event and stop auto-reconnect.</summary>
|
||||
protected void RaiseAuthenticationFailed(string message)
|
||||
{
|
||||
_logger.Warn($"{ClientRole} authentication failed: {message}");
|
||||
AuthenticationFailed?.Invoke(this, message);
|
||||
}
|
||||
|
||||
// --- Abstract members (subclass MUST implement) ---
|
||||
|
||||
@ -123,7 +134,9 @@ public abstract class WebSocketClientBase : IDisposable
|
||||
|
||||
await _webSocket.ConnectAsync(uri, _cts.Token);
|
||||
|
||||
_reconnectAttempts = 0;
|
||||
// Don't reset _reconnectAttempts here — TCP connect succeeding doesn't mean
|
||||
// auth will succeed. Reset only after the full application-level handshake
|
||||
// completes (subclass calls ResetReconnectAttempts after hello-ok).
|
||||
_logger.Info($"{ClientRole} connected, waiting for challenge...");
|
||||
|
||||
await OnConnectedAsync();
|
||||
|
||||
@ -530,6 +530,7 @@ public class WindowsNodeClient : WebSocketClientBase
|
||||
{
|
||||
var reconnectingAfterApproval = _pairingApprovedAwaitingReconnect;
|
||||
_isConnected = true;
|
||||
ResetReconnectAttempts();
|
||||
|
||||
// Extract node ID if returned
|
||||
if (payload.TryGetProperty("nodeId", out var nodeIdProp))
|
||||
|
||||
@ -87,6 +87,7 @@ public partial class App : Application
|
||||
private ActivityStreamWindow? _activityStreamWindow;
|
||||
private TrayMenuWindow? _trayMenuWindow;
|
||||
private QuickSendDialog? _quickSendDialog;
|
||||
private string? _authFailureMessage;
|
||||
|
||||
// Node service (optional, enabled in settings)
|
||||
private NodeService? _nodeService;
|
||||
@ -753,6 +754,12 @@ public partial class App : Application
|
||||
var statusIcon = MenuDisplayHelper.GetStatusIcon(_currentStatus);
|
||||
menu.AddMenuItem(string.Format(LocalizationHelper.GetString("Menu_StatusFormat"), LocalizationHelper.GetConnectionStatusText(_currentStatus)), statusIcon, "status");
|
||||
|
||||
// Auth failure nudge
|
||||
if (!string.IsNullOrEmpty(_authFailureMessage))
|
||||
{
|
||||
menu.AddMenuItem("⚠️ Auth failed — Run Setup", "🔧", "setup");
|
||||
}
|
||||
|
||||
// Activity (if any)
|
||||
if (_currentActivity != null && _currentActivity.Kind != OpenClaw.Shared.ActivityKind.Idle)
|
||||
{
|
||||
@ -1108,6 +1115,7 @@ public partial class App : Application
|
||||
_gatewayClient.SetUserRules(_settings.UserRules.Count > 0 ? _settings.UserRules : null);
|
||||
_gatewayClient.SetPreferStructuredCategories(_settings.PreferStructuredCategories);
|
||||
_gatewayClient.StatusChanged += OnConnectionStatusChanged;
|
||||
_gatewayClient.AuthenticationFailed += OnAuthenticationFailed;
|
||||
_gatewayClient.ActivityChanged += OnActivityChanged;
|
||||
_gatewayClient.NotificationReceived += OnNotificationReceived;
|
||||
_gatewayClient.ChannelHealthUpdated += OnChannelHealthUpdated;
|
||||
@ -1126,6 +1134,7 @@ public partial class App : Application
|
||||
if (_gatewayClient != null)
|
||||
{
|
||||
_gatewayClient.StatusChanged -= OnConnectionStatusChanged;
|
||||
_gatewayClient.AuthenticationFailed -= OnAuthenticationFailed;
|
||||
_gatewayClient.ActivityChanged -= OnActivityChanged;
|
||||
_gatewayClient.NotificationReceived -= OnNotificationReceived;
|
||||
_gatewayClient.ChannelHealthUpdated -= OnChannelHealthUpdated;
|
||||
@ -1245,6 +1254,8 @@ public partial class App : Application
|
||||
private void OnConnectionStatusChanged(object? sender, ConnectionStatus status)
|
||||
{
|
||||
_currentStatus = status;
|
||||
if (status == ConnectionStatus.Connected)
|
||||
_authFailureMessage = null;
|
||||
UpdateTrayIcon();
|
||||
|
||||
if (status == ConnectionStatus.Connected)
|
||||
@ -1253,6 +1264,14 @@ public partial class App : Application
|
||||
}
|
||||
}
|
||||
|
||||
private void OnAuthenticationFailed(object? sender, string message)
|
||||
{
|
||||
_authFailureMessage = message;
|
||||
Logger.Error($"Authentication failed: {message}");
|
||||
AddRecentActivity($"Auth failed: {message}", category: "error");
|
||||
UpdateTrayIcon();
|
||||
}
|
||||
|
||||
private void OnActivityChanged(object? sender, AgentActivity? activity)
|
||||
{
|
||||
if (activity == null)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user