fix: retry transient auth pairing drops

This commit is contained in:
Peter Steinberger 2026-05-05 05:34:33 +01:00
parent 09b2efbcaa
commit 3031a34ff2
No known key found for this signature in database
5 changed files with 139 additions and 6 deletions

View File

@ -35,6 +35,7 @@
### Fixed
- Auth: retry transient websocket drops before QR or phone pairing completes.
- Auth: propagate QR channel setup errors and surface actionable QR pairing failures. (#100 — thanks @pmatheus)
- Build: fail cgo-disabled CLI builds at compile time instead of shipping a go-sqlite3 stub binary. (#194 — thanks @rajgopalv)
- Chats: resolve mapped historical `@lid` chat rows in `chats list/show` output. (#31, #89 — thanks @bhaskoro-muthohar and @alexph-dev)

View File

@ -17,6 +17,7 @@ wacli auth logout
- Default pairing prints a terminal QR code.
- `--qr-format text` prints the raw QR payload for external renderers.
- `--phone PHONE` uses WhatsApp phone-number pairing instead of QR pairing.
- Transient websocket drops before pairing completes are retried with a fresh QR/code.
- After pairing, auth runs bootstrap sync until idle unless `--follow` is set.
- Bootstrap sync honors `WACLI_SYNC_MAX_MESSAGES` and `WACLI_SYNC_MAX_DB_SIZE` to cap local history growth.
- `auth status` reports whether the local store is authenticated.

View File

@ -28,6 +28,8 @@ type fakeWA struct {
handlers map[uint32]func(interface{})
connectEvents []interface{}
connectErrs []error
connectCalls int
connectDelay time.Duration
downloadDelay time.Duration
@ -78,14 +80,29 @@ func (f *fakeWA) IsConnected() bool {
func (f *fakeWA) Connect(ctx context.Context, opts wa.ConnectOptions) error {
f.mu.Lock()
f.connectCalls++
authed := f.authed
var connectErr error
if len(f.connectErrs) > 0 {
connectErr = f.connectErrs[0]
f.connectErrs = f.connectErrs[1:]
}
f.connected = true
eventsToEmit := append([]interface{}{}, f.connectEvents...)
f.mu.Unlock()
if !authed && !opts.AllowQR {
f.mu.Lock()
f.connected = false
f.mu.Unlock()
return fmt.Errorf("not authenticated; run `wacli auth`")
}
if connectErr != nil {
f.mu.Lock()
f.connected = false
f.mu.Unlock()
return connectErr
}
if f.connectDelay > 0 {
select {
case <-time.After(f.connectDelay):

View File

@ -2,6 +2,7 @@ package app
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
@ -14,6 +15,8 @@ import (
"go.mau.fi/whatsmeow/types"
)
const maxAuthConnectAttempts = 3
type SyncMode string
const (
@ -92,12 +95,7 @@ func (a *App) Sync(ctx context.Context, opts SyncOptions) (SyncResult, error) {
defer stopMedia()
}
if err := a.wa.Connect(syncCtx, wa.ConnectOptions{
AllowQR: opts.AllowQR,
OnQRCode: opts.OnQRCode,
PairPhoneNumber: opts.PairPhoneNumber,
OnPairCode: opts.OnPairCode,
}); err != nil {
if err := a.connectForSync(syncCtx, opts); err != nil {
return SyncResult{}, err
}
lastEvent.Store(nowUTC().UnixNano())
@ -133,6 +131,62 @@ func (a *App) Sync(ctx context.Context, opts SyncOptions) (SyncResult, error) {
return SyncResult{MessagesStored: messagesStored.Load()}, nil
}
func (a *App) connectForSync(ctx context.Context, opts SyncOptions) error {
connectOpts := wa.ConnectOptions{
AllowQR: opts.AllowQR,
OnQRCode: opts.OnQRCode,
PairPhoneNumber: opts.PairPhoneNumber,
OnPairCode: opts.OnPairCode,
}
attempts := 1
if opts.AllowQR || opts.PairPhoneNumber != "" {
attempts = maxAuthConnectAttempts
}
for attempt := 1; attempt <= attempts; attempt++ {
err := a.wa.Connect(ctx, connectOpts)
if err == nil {
return nil
}
if attempt == attempts || ctx.Err() != nil || !isRetryableAuthConnectError(err) {
return err
}
fmt.Fprintf(os.Stderr, "warning: auth connection dropped before pairing completed; retrying (%d/%d)\n", attempt+1, attempts)
select {
case <-time.After(authConnectRetryDelay(attempt)):
case <-ctx.Done():
return ctx.Err()
}
}
return nil
}
func authConnectRetryDelay(attempt int) time.Duration {
return time.Duration(attempt) * 500 * time.Millisecond
}
func isRetryableAuthConnectError(err error) bool {
if err == nil || errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return false
}
msg := strings.ToLower(err.Error())
for _, needle := range []string{
"qr code timed out",
"qr channel closed",
"websocket",
"failed to read frame header",
"connection reset",
"broken pipe",
"i/o timeout",
"eof",
} {
if strings.Contains(msg, needle) {
return true
}
}
return false
}
func (a *App) checkSyncStorageLimits(opts SyncOptions) error {
if opts.MaxMessages > 0 {
count, err := a.db.CountMessages()

View File

@ -633,3 +633,63 @@ func TestSyncOnceIdleExitStartsAfterConnected(t *testing.T) {
t.Fatalf("expected idle timer to start after connect, exited after %s", elapsed)
}
}
func TestSyncRetriesTransientAuthConnectFailure(t *testing.T) {
a := newTestApp(t)
f := newFakeWA()
f.authed = false
f.connectErrs = []error{fmt.Errorf("QR code timed out; run `wacli auth` again")}
a.wa = f
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
_, err := a.Sync(ctx, SyncOptions{
Mode: SyncModeOnce,
AllowQR: true,
IdleExit: 10 * time.Millisecond,
})
if err != nil {
t.Fatalf("Sync: %v", err)
}
if f.connectCalls != 2 {
t.Fatalf("connect calls = %d, want 2", f.connectCalls)
}
}
func TestSyncDoesNotRetryTransientConnectFailureOutsideAuthFlow(t *testing.T) {
a := newTestApp(t)
f := newFakeWA()
f.connectErrs = []error{fmt.Errorf("QR code timed out; run `wacli auth` again")}
a.wa = f
_, err := a.Sync(context.Background(), SyncOptions{
Mode: SyncModeOnce,
AllowQR: false,
})
if err == nil {
t.Fatalf("expected connect error")
}
if f.connectCalls != 1 {
t.Fatalf("connect calls = %d, want 1", f.connectCalls)
}
}
func TestSyncDoesNotRetryNonTransientAuthConnectFailure(t *testing.T) {
a := newTestApp(t)
f := newFakeWA()
f.authed = false
f.connectErrs = []error{fmt.Errorf("QR pairing failed: bad code")}
a.wa = f
_, err := a.Sync(context.Background(), SyncOptions{
Mode: SyncModeOnce,
AllowQR: true,
})
if err == nil || !strings.Contains(err.Error(), "bad code") {
t.Fatalf("expected pairing error, got %v", err)
}
if f.connectCalls != 1 {
t.Fatalf("connect calls = %d, want 1", f.connectCalls)
}
}