From e41945159b1d15e2729cfbbef9360494abe92b35 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 8 May 2026 14:18:34 +0100 Subject: [PATCH] feat: fetch Google contact avatars --- CHANGELOG.md | 2 +- README.md | 9 +- internal/cli/cli.go | 3 +- internal/google/gog.go | 184 +++++++++++++++++++++++++++++++++++- internal/google/gog_test.go | 171 +++++++++++++++++++++++++++++++++ 5 files changed, 363 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54bd449..5aebeff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,6 @@ - Initial `clawdex` CLI with markdown-backed people, timestamped notes, search, timeline, Git helpers, vCard export, and repair for damaged frontmatter. - Added Apple Contacts import on macOS, Google Contacts import through `gog`, Discord DM backfill through Discrawl, and X/Twitter DM backfill through Birdclaw. -- Added local avatar support with manual avatar commands, Apple thumbnail backfill, avatar repair checks, and optional vCard `PHOTO` export. +- Added local avatar support with manual avatar commands, Apple and Google avatar backfill, avatar repair checks, and optional vCard `PHOTO` export. - Added CI with lint, tests, 90% coverage enforcement, race tests, dependency checks, secret scanning, and GoReleaser snapshot validation. - Added GoReleaser config and release workflow that publishes cross-platform binaries and dispatches the Homebrew tap formula updater. diff --git a/README.md b/README.md index 3fb83d9..d195581 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Apple and Google imports write only to the local markdown data repo. clawdex import apple --dry-run clawdex import apple --avatars clawdex import google --account steipete@gmail.com --dry-run +clawdex import google --account steipete@gmail.com --avatars --dry-run clawdex import birdclaw --min-messages 4 --dry-run clawdex import discrawl --min-messages 4 --dry-run ``` @@ -73,9 +74,11 @@ clawdex import discrawl --min-messages 4 --dry-run Apple direct import uses macOS `Contacts.framework`. Linux builds still support markdown, notes, search, Git, Google via `gog`, and vCard export. -Avatar imports are opt-in with `--avatars`. clawdex stores thumbnails as local -files under each person directory and records only metadata in `person.md`. -Manual avatars are not overwritten by Apple/Google imports. +Avatar imports are opt-in with `--avatars`. Apple reads thumbnails from +Contacts.framework. Google uses `gog contacts raw --person-fields photos`, +fetches the selected photo URL bytes, then stores thumbnails as local files +under each person directory and records only metadata in `person.md`. Manual +avatars are not overwritten by Apple/Google imports. Birdclaw and Discrawl DM imports read local archives only. They import DM conversations with more than `--min-messages` messages, add source-specific diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 0dd3ede..21b20f5 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -443,6 +443,7 @@ func (c *ImportAppleCmd) Run(r *Runtime) error { type ImportGoogleCmd struct { Account string `name:"account" help:"Google account email"` + Avatars bool `name:"avatars" help:"Fetch Google contact avatar bytes through gog raw photo URLs"` } func (c *ImportGoogleCmd) Run(r *Runtime) error { @@ -450,7 +451,7 @@ func (c *ImportGoogleCmd) Run(r *Runtime) error { if account == "" { account = r.cfg.Google.DefaultAccount } - contacts, err := (google.GogAdapter{}).ListContacts(r.ctx, account) + contacts, err := (google.GogAdapter{}).ListContactsWithOptions(r.ctx, account, google.Options{IncludeAvatars: c.Avatars}) if err != nil { return err } diff --git a/internal/google/gog.go b/internal/google/gog.go index 4fb1a50..85a8196 100644 --- a/internal/google/gog.go +++ b/internal/google/gog.go @@ -5,17 +5,40 @@ import ( "encoding/json" "errors" "fmt" + "io" + "net/http" "os/exec" "strings" + "sync" + "time" "github.com/openclaw/clawdex/internal/model" ) +const ( + defaultAvatarConcurrency = 4 + maxAvatarConcurrency = 8 + maxAvatarBytes = 10 << 20 + avatarLookupTimeout = 20 * time.Second +) + +type Options struct { + IncludeAvatars bool + AvatarConcurrency int +} + +type AvatarFetchFunc func(context.Context, string) (model.SourceAvatar, error) + type GogAdapter struct { - Binary string + Binary string + FetchAvatar AvatarFetchFunc } func (g GogAdapter) ListContacts(ctx context.Context, account string) ([]model.SourceContact, error) { + return g.ListContactsWithOptions(ctx, account, Options{}) +} + +func (g GogAdapter) ListContactsWithOptions(ctx context.Context, account string, opts Options) ([]model.SourceContact, error) { binary := g.Binary if binary == "" { binary = "gog" @@ -46,12 +69,99 @@ func (g GogAdapter) ListContacts(ctx context.Context, account string) ([]model.S } out = append(out, contacts...) if nextPage == "" { + if opts.IncludeAvatars { + g.attachAvatars(ctx, binary, account, out, opts.avatarConcurrency()) + } return out, nil } page = nextPage } } +func (o Options) avatarConcurrency() int { + if o.AvatarConcurrency <= 0 { + return defaultAvatarConcurrency + } + if o.AvatarConcurrency > maxAvatarConcurrency { + return maxAvatarConcurrency + } + return o.AvatarConcurrency +} + +func (g GogAdapter) attachAvatars(ctx context.Context, binary string, account string, contacts []model.SourceContact, concurrency int) { + if len(contacts) == 0 { + return + } + if concurrency < 1 { + concurrency = 1 + } + if concurrency > len(contacts) { + concurrency = len(contacts) + } + jobs := make(chan int) + var wg sync.WaitGroup + for range concurrency { + wg.Go(func() { + for i := range jobs { + g.attachAvatar(ctx, binary, account, &contacts[i]) + } + }) + } + for i := range contacts { + select { + case <-ctx.Done(): + close(jobs) + wg.Wait() + return + case jobs <- i: + } + } + close(jobs) + wg.Wait() +} + +func (g GogAdapter) attachAvatar(ctx context.Context, binary string, account string, contact *model.SourceContact) { + if ctx.Err() != nil || contact == nil { + return + } + if strings.TrimSpace(contact.ExternalID) == "" || contact.Avatar != nil { + return + } + lookupCtx, cancel := context.WithTimeout(ctx, avatarLookupTimeout) + defer cancel() + raw, err := g.rawContact(lookupCtx, binary, account, contact.ExternalID) + if err != nil { + return + } + url, err := parseGogPhotoURL(raw) + if err != nil || url == "" { + return + } + avatar, err := g.fetchAvatar(lookupCtx, url) + if err != nil || len(avatar.Data) == 0 { + return + } + avatar.URL = url + contact.Avatar = &avatar +} + +func (g GogAdapter) rawContact(ctx context.Context, binary string, account string, identifier string) ([]byte, error) { + args := []string{"--no-input", "contacts", "raw", identifier, "--person-fields", "photos", "--json"} + if strings.TrimSpace(account) != "" { + args = append([]string{"--account", account}, args...) + } + // #nosec G204 -- the adapter intentionally shells to a configured gog binary without using a shell. + cmd := exec.CommandContext(ctx, binary, args...) + return cmd.Output() +} + +func (g GogAdapter) fetchAvatar(ctx context.Context, url string) (model.SourceAvatar, error) { + if g.FetchAvatar != nil { + return g.FetchAvatar(ctx, url) + } + return fetchAvatarURL(ctx, url) +} + type gogEnvelope struct { Contacts []gogPerson `json:"contacts"` Results []gogPerson `json:"results"` @@ -144,6 +254,78 @@ func convertPeople(people []gogPerson) []model.SourceContact { return out } +type gogPhotoEnvelope struct { + Contact *gogPhotoPerson `json:"contact"` + Person *gogPhotoPerson `json:"person"` + Photos []gogPhoto `json:"photos"` +} + +type gogPhotoPerson struct { + Photos []gogPhoto `json:"photos"` +} + +type gogPhoto struct { + URL string `json:"url"` + Metadata struct { + Primary bool `json:"primary"` + } `json:"metadata"` +} + +func parseGogPhotoURL(data []byte) (string, error) { + var env gogPhotoEnvelope + if err := json.Unmarshal(data, &env); err != nil { + return "", err + } + photos := env.Photos + if len(photos) == 0 && env.Contact != nil { + photos = env.Contact.Photos + } + if len(photos) == 0 && env.Person != nil { + photos = env.Person.Photos + } + for _, photo := range photos { + if photo.Metadata.Primary && strings.TrimSpace(photo.URL) != "" { + return strings.TrimSpace(photo.URL), nil + } + } + for _, photo := range photos { + if strings.TrimSpace(photo.URL) != "" { + return strings.TrimSpace(photo.URL), nil + } + } + return "", nil +} + +func fetchAvatarURL(ctx context.Context, url string) (model.SourceAvatar, error) { + if !strings.HasPrefix(url, "https://") && !strings.HasPrefix(url, "http://") { + return model.SourceAvatar{}, fmt.Errorf("unsupported avatar URL: %s", url) + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return model.SourceAvatar{}, err + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return model.SourceAvatar{}, err + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return model.SourceAvatar{}, fmt.Errorf("avatar fetch failed: HTTP %d", resp.StatusCode) + } + data, err := io.ReadAll(io.LimitReader(resp.Body, maxAvatarBytes+1)) + if err != nil { + return model.SourceAvatar{}, err + } + if len(data) > maxAvatarBytes { + return model.SourceAvatar{}, errors.New("avatar too large") + } + mime := strings.TrimSpace(resp.Header.Get("Content-Type")) + if idx := strings.IndexByte(mime, ';'); idx >= 0 { + mime = strings.TrimSpace(mime[:idx]) + } + return model.SourceAvatar{Data: data, MIME: mime, URL: url}, nil +} + func firstNonEmpty(values ...string) string { for _, value := range values { if strings.TrimSpace(value) != "" { diff --git a/internal/google/gog_test.go b/internal/google/gog_test.go index d65700a..3df299b 100644 --- a/internal/google/gog_test.go +++ b/internal/google/gog_test.go @@ -1,12 +1,17 @@ package google import ( + "context" "encoding/json" + "net/http" + "net/http/httptest" "os" "path/filepath" "runtime" "strings" "testing" + + "github.com/openclaw/clawdex/internal/model" ) func TestParseGogContactsEnvelopeAndArray(t *testing.T) { @@ -57,6 +62,107 @@ func TestGogAdapterListContactsUsesNoInput(t *testing.T) { } } +func TestGogAdapterListContactsWithAvatars(t *testing.T) { + dir := t.TempDir() + bin := filepath.Join(dir, "gog") + if runtime.GOOS == "windows" { + bin += ".bat" + } + script := "#!/bin/sh\nprintf '%s\\n' \"$@\" >> \"" + filepath.Join(dir, "args") + "\"\ncase \"$*\" in *\"contacts raw\"*) printf '%s\\n' '{\"photos\":[{\"url\":\"https://example.com/secondary.jpg\"},{\"url\":\"https://example.com/grace.jpg\",\"metadata\":{\"primary\":true}}]}' ;; *) printf '%s\\n' '{\"contacts\":[{\"resourceName\":\"people/g1\",\"name\":\"Grace\",\"email\":\"grace@example.com\"},{\"name\":\"No Resource\"}]}' ;; esac\n" + if err := os.WriteFile(bin, []byte(script), 0o700); err != nil { + t.Fatal(err) + } + var fetched string + adapter := GogAdapter{ + Binary: bin, + FetchAvatar: func(_ context.Context, url string) (model.SourceAvatar, error) { + fetched = url + return model.SourceAvatar{Data: []byte("avatar"), MIME: "image/jpeg"}, nil + }, + } + contacts, err := adapter.ListContactsWithOptions(t.Context(), "ada@example.com", Options{IncludeAvatars: true}) + if err != nil { + t.Fatal(err) + } + if len(contacts) != 2 { + t.Fatalf("contacts = %#v", contacts) + } + if contacts[0].Avatar == nil || string(contacts[0].Avatar.Data) != "avatar" { + t.Fatalf("avatar = %#v", contacts[0].Avatar) + } + if contacts[1].Avatar != nil { + t.Fatalf("unexpected avatar for missing resource = %#v", contacts[1].Avatar) + } + if fetched != "https://example.com/grace.jpg" { + t.Fatalf("fetched = %q", fetched) + } + args, err := os.ReadFile(filepath.Join(dir, "args")) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(args), "contacts\nraw\npeople/g1\n--person-fields\nphotos") { + t.Fatalf("missing raw photo args = %s", args) + } +} + +func TestGogAdapterAvatarFailuresAreBestEffort(t *testing.T) { + dir := t.TempDir() + bin := filepath.Join(dir, "gog") + if err := os.WriteFile(bin, []byte("#!/bin/sh\ncase \"$*\" in *badraw*) exit 5 ;; *badjson*) printf '%s\\n' '{' ;; *empty*) printf '%s\\n' '{}' ;; *) printf '%s\\n' '{\"photos\":[{\"url\":\"https://example.com/avatar.jpg\"}]}' ;; esac\n"), 0o700); err != nil { + t.Fatal(err) + } + contacts := []model.SourceContact{ + {Source: "google", ExternalID: "people/badraw", Name: "Bad Raw"}, + {Source: "google", ExternalID: "people/badjson", Name: "Bad JSON"}, + {Source: "google", ExternalID: "people/empty", Name: "Empty"}, + {Source: "google", ExternalID: "people/fetcherr", Name: "Fetch Err"}, + {Source: "google", ExternalID: "people/ok", Name: "OK"}, + } + adapter := GogAdapter{ + Binary: bin, + FetchAvatar: func(_ context.Context, url string) (model.SourceAvatar, error) { + if strings.Contains(url, "avatar.jpg") { + return model.SourceAvatar{}, os.ErrPermission + } + return model.SourceAvatar{Data: []byte("avatar")}, nil + }, + } + adapter.attachAvatars(t.Context(), bin, "", contacts, 1) + for _, contact := range contacts { + if contact.Avatar != nil { + t.Fatalf("unexpected avatar after failures = %#v", contacts) + } + } +} + +func TestAvatarConcurrencyGuards(t *testing.T) { + for _, tc := range []struct { + options Options + want int + }{ + {options: Options{}, want: defaultAvatarConcurrency}, + {options: Options{AvatarConcurrency: -1}, want: defaultAvatarConcurrency}, + {options: Options{AvatarConcurrency: 2}, want: 2}, + {options: Options{AvatarConcurrency: maxAvatarConcurrency + 1}, want: maxAvatarConcurrency}, + } { + if got := tc.options.avatarConcurrency(); got != tc.want { + t.Fatalf("avatarConcurrency(%#v) = %d want %d", tc.options, got, tc.want) + } + } +} + +func TestAttachAvatarsContextGuards(t *testing.T) { + adapter := GogAdapter{} + adapter.attachAvatars(t.Context(), "gog", "", nil, 0) + ctx, cancel := context.WithCancel(t.Context()) + cancel() + contacts := []model.SourceContact{{Source: "google", ExternalID: "people/1", Name: "Canceled"}} + adapter.attachAvatars(ctx, "gog", "", contacts, 0) + if contacts[0].Avatar != nil { + t.Fatalf("unexpected avatar with canceled context = %#v", contacts[0].Avatar) + } +} + func TestGogAdapterListContactsCommandFailure(t *testing.T) { dir := t.TempDir() bin := filepath.Join(dir, "gog") @@ -85,3 +191,68 @@ func TestParseGogContactsRejectsInvalidJSON(t *testing.T) { t.Fatalf("got = %#v", got) } } + +func TestParseGogPhotoURL(t *testing.T) { + for _, tc := range []struct { + name string + json string + want string + }{ + {name: "direct primary", json: `{"photos":[{"url":"https://example.com/a.jpg"},{"url":"https://example.com/b.jpg","metadata":{"primary":true}}]}`, want: "https://example.com/b.jpg"}, + {name: "contact wrapper", json: `{"contact":{"photos":[{"url":"https://example.com/c.jpg"}]}}`, want: "https://example.com/c.jpg"}, + {name: "person wrapper", json: `{"person":{"photos":[{"url":" https://example.com/d.jpg "}]}}`, want: "https://example.com/d.jpg"}, + {name: "none", json: `{}`, want: ""}, + } { + t.Run(tc.name, func(t *testing.T) { + got, err := parseGogPhotoURL([]byte(tc.json)) + if err != nil { + t.Fatal(err) + } + if got != tc.want { + t.Fatalf("got %q want %q", got, tc.want) + } + }) + } + if _, err := parseGogPhotoURL([]byte(`{`)); err == nil { + t.Fatal("expected invalid JSON error") + } +} + +func TestFetchAvatarURL(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/missing" { + http.NotFound(w, r) + return + } + if r.URL.Path == "/too-large" { + _, _ = w.Write(make([]byte, maxAvatarBytes+1)) + return + } + w.Header().Set("Content-Type", "image/png; charset=utf-8") + _, _ = w.Write([]byte("png")) + })) + defer server.Close() + + avatar, err := fetchAvatarURL(t.Context(), server.URL+"/avatar.png") + if err != nil { + t.Fatal(err) + } + if string(avatar.Data) != "png" || avatar.MIME != "image/png" || !strings.HasSuffix(avatar.URL, "/avatar.png") { + t.Fatalf("avatar = %#v", avatar) + } + if _, err := fetchAvatarURL(t.Context(), "file:///tmp/avatar.png"); err == nil { + t.Fatal("expected unsupported URL error") + } + if _, err := fetchAvatarURL(t.Context(), "http://[::1"); err == nil { + t.Fatal("expected bad URL error") + } + if _, err := fetchAvatarURL(t.Context(), server.URL+"/missing"); err == nil { + t.Fatal("expected non-2xx error") + } + if _, err := fetchAvatarURL(t.Context(), server.URL+"/too-large"); err == nil { + t.Fatal("expected too large error") + } + if avatar, err := (GogAdapter{}).fetchAvatar(t.Context(), server.URL+"/avatar.png"); err != nil || string(avatar.Data) != "png" { + t.Fatalf("default fetchAvatar = %#v err=%v", avatar, err) + } +}