feat: fetch Google contact avatars

This commit is contained in:
Peter Steinberger 2026-05-08 14:18:34 +01:00
parent 458083fa8a
commit e41945159b
No known key found for this signature in database
5 changed files with 363 additions and 6 deletions

View File

@ -4,6 +4,6 @@
- Initial `clawdex` CLI with markdown-backed people, timestamped notes, search, timeline, Git helpers, vCard export, and repair for damaged frontmatter.
- Added Apple Contacts import on macOS, Google Contacts import through `gog`, Discord DM backfill through Discrawl, and X/Twitter DM backfill through Birdclaw.
- Added local avatar support with manual avatar commands, Apple thumbnail backfill, avatar repair checks, and optional vCard `PHOTO` export.
- Added local avatar support with manual avatar commands, Apple and Google avatar backfill, avatar repair checks, and optional vCard `PHOTO` export.
- Added CI with lint, tests, 90% coverage enforcement, race tests, dependency checks, secret scanning, and GoReleaser snapshot validation.
- Added GoReleaser config and release workflow that publishes cross-platform binaries and dispatches the Homebrew tap formula updater.

View File

@ -66,6 +66,7 @@ Apple and Google imports write only to the local markdown data repo.
clawdex import apple --dry-run
clawdex import apple --avatars
clawdex import google --account steipete@gmail.com --dry-run
clawdex import google --account steipete@gmail.com --avatars --dry-run
clawdex import birdclaw --min-messages 4 --dry-run
clawdex import discrawl --min-messages 4 --dry-run
```
@ -73,9 +74,11 @@ clawdex import discrawl --min-messages 4 --dry-run
Apple direct import uses macOS `Contacts.framework`. Linux builds still support
markdown, notes, search, Git, Google via `gog`, and vCard export.
Avatar imports are opt-in with `--avatars`. clawdex stores thumbnails as local
files under each person directory and records only metadata in `person.md`.
Manual avatars are not overwritten by Apple/Google imports.
Avatar imports are opt-in with `--avatars`. Apple reads thumbnails from
Contacts.framework. Google uses `gog contacts raw --person-fields photos`,
fetches the selected photo URL bytes, then stores thumbnails as local files
under each person directory and records only metadata in `person.md`. Manual
avatars are not overwritten by Apple/Google imports.
Birdclaw and Discrawl DM imports read local archives only. They import DM
conversations with more than `--min-messages` messages, add source-specific

View File

@ -443,6 +443,7 @@ func (c *ImportAppleCmd) Run(r *Runtime) error {
type ImportGoogleCmd struct {
Account string `name:"account" help:"Google account email"`
Avatars bool `name:"avatars" help:"Fetch Google contact avatar bytes through gog raw photo URLs"`
}
func (c *ImportGoogleCmd) Run(r *Runtime) error {
@ -450,7 +451,7 @@ func (c *ImportGoogleCmd) Run(r *Runtime) error {
if account == "" {
account = r.cfg.Google.DefaultAccount
}
contacts, err := (google.GogAdapter{}).ListContacts(r.ctx, account)
contacts, err := (google.GogAdapter{}).ListContactsWithOptions(r.ctx, account, google.Options{IncludeAvatars: c.Avatars})
if err != nil {
return err
}

View File

@ -5,17 +5,40 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"os/exec"
"strings"
"sync"
"time"
"github.com/openclaw/clawdex/internal/model"
)
const (
defaultAvatarConcurrency = 4
maxAvatarConcurrency = 8
maxAvatarBytes = 10 << 20
avatarLookupTimeout = 20 * time.Second
)
type Options struct {
IncludeAvatars bool
AvatarConcurrency int
}
type AvatarFetchFunc func(context.Context, string) (model.SourceAvatar, error)
type GogAdapter struct {
Binary string
Binary string
FetchAvatar AvatarFetchFunc
}
func (g GogAdapter) ListContacts(ctx context.Context, account string) ([]model.SourceContact, error) {
return g.ListContactsWithOptions(ctx, account, Options{})
}
func (g GogAdapter) ListContactsWithOptions(ctx context.Context, account string, opts Options) ([]model.SourceContact, error) {
binary := g.Binary
if binary == "" {
binary = "gog"
@ -46,12 +69,99 @@ func (g GogAdapter) ListContacts(ctx context.Context, account string) ([]model.S
}
out = append(out, contacts...)
if nextPage == "" {
if opts.IncludeAvatars {
g.attachAvatars(ctx, binary, account, out, opts.avatarConcurrency())
}
return out, nil
}
page = nextPage
}
}
func (o Options) avatarConcurrency() int {
if o.AvatarConcurrency <= 0 {
return defaultAvatarConcurrency
}
if o.AvatarConcurrency > maxAvatarConcurrency {
return maxAvatarConcurrency
}
return o.AvatarConcurrency
}
func (g GogAdapter) attachAvatars(ctx context.Context, binary string, account string, contacts []model.SourceContact, concurrency int) {
if len(contacts) == 0 {
return
}
if concurrency < 1 {
concurrency = 1
}
if concurrency > len(contacts) {
concurrency = len(contacts)
}
jobs := make(chan int)
var wg sync.WaitGroup
for range concurrency {
wg.Go(func() {
for i := range jobs {
g.attachAvatar(ctx, binary, account, &contacts[i])
}
})
}
for i := range contacts {
select {
case <-ctx.Done():
close(jobs)
wg.Wait()
return
case jobs <- i:
}
}
close(jobs)
wg.Wait()
}
func (g GogAdapter) attachAvatar(ctx context.Context, binary string, account string, contact *model.SourceContact) {
if ctx.Err() != nil || contact == nil {
return
}
if strings.TrimSpace(contact.ExternalID) == "" || contact.Avatar != nil {
return
}
lookupCtx, cancel := context.WithTimeout(ctx, avatarLookupTimeout)
defer cancel()
raw, err := g.rawContact(lookupCtx, binary, account, contact.ExternalID)
if err != nil {
return
}
url, err := parseGogPhotoURL(raw)
if err != nil || url == "" {
return
}
avatar, err := g.fetchAvatar(lookupCtx, url)
if err != nil || len(avatar.Data) == 0 {
return
}
avatar.URL = url
contact.Avatar = &avatar
}
func (g GogAdapter) rawContact(ctx context.Context, binary string, account string, identifier string) ([]byte, error) {
args := []string{"--no-input", "contacts", "raw", identifier, "--person-fields", "photos", "--json"}
if strings.TrimSpace(account) != "" {
args = append([]string{"--account", account}, args...)
}
// #nosec G204 -- the adapter intentionally shells to a configured gog binary without using a shell.
cmd := exec.CommandContext(ctx, binary, args...)
return cmd.Output()
}
func (g GogAdapter) fetchAvatar(ctx context.Context, url string) (model.SourceAvatar, error) {
if g.FetchAvatar != nil {
return g.FetchAvatar(ctx, url)
}
return fetchAvatarURL(ctx, url)
}
type gogEnvelope struct {
Contacts []gogPerson `json:"contacts"`
Results []gogPerson `json:"results"`
@ -144,6 +254,78 @@ func convertPeople(people []gogPerson) []model.SourceContact {
return out
}
type gogPhotoEnvelope struct {
Contact *gogPhotoPerson `json:"contact"`
Person *gogPhotoPerson `json:"person"`
Photos []gogPhoto `json:"photos"`
}
type gogPhotoPerson struct {
Photos []gogPhoto `json:"photos"`
}
type gogPhoto struct {
URL string `json:"url"`
Metadata struct {
Primary bool `json:"primary"`
} `json:"metadata"`
}
func parseGogPhotoURL(data []byte) (string, error) {
var env gogPhotoEnvelope
if err := json.Unmarshal(data, &env); err != nil {
return "", err
}
photos := env.Photos
if len(photos) == 0 && env.Contact != nil {
photos = env.Contact.Photos
}
if len(photos) == 0 && env.Person != nil {
photos = env.Person.Photos
}
for _, photo := range photos {
if photo.Metadata.Primary && strings.TrimSpace(photo.URL) != "" {
return strings.TrimSpace(photo.URL), nil
}
}
for _, photo := range photos {
if strings.TrimSpace(photo.URL) != "" {
return strings.TrimSpace(photo.URL), nil
}
}
return "", nil
}
func fetchAvatarURL(ctx context.Context, url string) (model.SourceAvatar, error) {
if !strings.HasPrefix(url, "https://") && !strings.HasPrefix(url, "http://") {
return model.SourceAvatar{}, fmt.Errorf("unsupported avatar URL: %s", url)
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return model.SourceAvatar{}, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return model.SourceAvatar{}, err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode < 200 || resp.StatusCode > 299 {
return model.SourceAvatar{}, fmt.Errorf("avatar fetch failed: HTTP %d", resp.StatusCode)
}
data, err := io.ReadAll(io.LimitReader(resp.Body, maxAvatarBytes+1))
if err != nil {
return model.SourceAvatar{}, err
}
if len(data) > maxAvatarBytes {
return model.SourceAvatar{}, errors.New("avatar too large")
}
mime := strings.TrimSpace(resp.Header.Get("Content-Type"))
if idx := strings.IndexByte(mime, ';'); idx >= 0 {
mime = strings.TrimSpace(mime[:idx])
}
return model.SourceAvatar{Data: data, MIME: mime, URL: url}, nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {

View File

@ -1,12 +1,17 @@
package google
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"runtime"
"strings"
"testing"
"github.com/openclaw/clawdex/internal/model"
)
func TestParseGogContactsEnvelopeAndArray(t *testing.T) {
@ -57,6 +62,107 @@ func TestGogAdapterListContactsUsesNoInput(t *testing.T) {
}
}
func TestGogAdapterListContactsWithAvatars(t *testing.T) {
dir := t.TempDir()
bin := filepath.Join(dir, "gog")
if runtime.GOOS == "windows" {
bin += ".bat"
}
script := "#!/bin/sh\nprintf '%s\\n' \"$@\" >> \"" + filepath.Join(dir, "args") + "\"\ncase \"$*\" in *\"contacts raw\"*) printf '%s\\n' '{\"photos\":[{\"url\":\"https://example.com/secondary.jpg\"},{\"url\":\"https://example.com/grace.jpg\",\"metadata\":{\"primary\":true}}]}' ;; *) printf '%s\\n' '{\"contacts\":[{\"resourceName\":\"people/g1\",\"name\":\"Grace\",\"email\":\"grace@example.com\"},{\"name\":\"No Resource\"}]}' ;; esac\n"
if err := os.WriteFile(bin, []byte(script), 0o700); err != nil {
t.Fatal(err)
}
var fetched string
adapter := GogAdapter{
Binary: bin,
FetchAvatar: func(_ context.Context, url string) (model.SourceAvatar, error) {
fetched = url
return model.SourceAvatar{Data: []byte("avatar"), MIME: "image/jpeg"}, nil
},
}
contacts, err := adapter.ListContactsWithOptions(t.Context(), "ada@example.com", Options{IncludeAvatars: true})
if err != nil {
t.Fatal(err)
}
if len(contacts) != 2 {
t.Fatalf("contacts = %#v", contacts)
}
if contacts[0].Avatar == nil || string(contacts[0].Avatar.Data) != "avatar" {
t.Fatalf("avatar = %#v", contacts[0].Avatar)
}
if contacts[1].Avatar != nil {
t.Fatalf("unexpected avatar for missing resource = %#v", contacts[1].Avatar)
}
if fetched != "https://example.com/grace.jpg" {
t.Fatalf("fetched = %q", fetched)
}
args, err := os.ReadFile(filepath.Join(dir, "args"))
if err != nil {
t.Fatal(err)
}
if !strings.Contains(string(args), "contacts\nraw\npeople/g1\n--person-fields\nphotos") {
t.Fatalf("missing raw photo args = %s", args)
}
}
func TestGogAdapterAvatarFailuresAreBestEffort(t *testing.T) {
dir := t.TempDir()
bin := filepath.Join(dir, "gog")
if err := os.WriteFile(bin, []byte("#!/bin/sh\ncase \"$*\" in *badraw*) exit 5 ;; *badjson*) printf '%s\\n' '{' ;; *empty*) printf '%s\\n' '{}' ;; *) printf '%s\\n' '{\"photos\":[{\"url\":\"https://example.com/avatar.jpg\"}]}' ;; esac\n"), 0o700); err != nil {
t.Fatal(err)
}
contacts := []model.SourceContact{
{Source: "google", ExternalID: "people/badraw", Name: "Bad Raw"},
{Source: "google", ExternalID: "people/badjson", Name: "Bad JSON"},
{Source: "google", ExternalID: "people/empty", Name: "Empty"},
{Source: "google", ExternalID: "people/fetcherr", Name: "Fetch Err"},
{Source: "google", ExternalID: "people/ok", Name: "OK"},
}
adapter := GogAdapter{
Binary: bin,
FetchAvatar: func(_ context.Context, url string) (model.SourceAvatar, error) {
if strings.Contains(url, "avatar.jpg") {
return model.SourceAvatar{}, os.ErrPermission
}
return model.SourceAvatar{Data: []byte("avatar")}, nil
},
}
adapter.attachAvatars(t.Context(), bin, "", contacts, 1)
for _, contact := range contacts {
if contact.Avatar != nil {
t.Fatalf("unexpected avatar after failures = %#v", contacts)
}
}
}
func TestAvatarConcurrencyGuards(t *testing.T) {
for _, tc := range []struct {
options Options
want int
}{
{options: Options{}, want: defaultAvatarConcurrency},
{options: Options{AvatarConcurrency: -1}, want: defaultAvatarConcurrency},
{options: Options{AvatarConcurrency: 2}, want: 2},
{options: Options{AvatarConcurrency: maxAvatarConcurrency + 1}, want: maxAvatarConcurrency},
} {
if got := tc.options.avatarConcurrency(); got != tc.want {
t.Fatalf("avatarConcurrency(%#v) = %d want %d", tc.options, got, tc.want)
}
}
}
func TestAttachAvatarsContextGuards(t *testing.T) {
adapter := GogAdapter{}
adapter.attachAvatars(t.Context(), "gog", "", nil, 0)
ctx, cancel := context.WithCancel(t.Context())
cancel()
contacts := []model.SourceContact{{Source: "google", ExternalID: "people/1", Name: "Canceled"}}
adapter.attachAvatars(ctx, "gog", "", contacts, 0)
if contacts[0].Avatar != nil {
t.Fatalf("unexpected avatar with canceled context = %#v", contacts[0].Avatar)
}
}
func TestGogAdapterListContactsCommandFailure(t *testing.T) {
dir := t.TempDir()
bin := filepath.Join(dir, "gog")
@ -85,3 +191,68 @@ func TestParseGogContactsRejectsInvalidJSON(t *testing.T) {
t.Fatalf("got = %#v", got)
}
}
func TestParseGogPhotoURL(t *testing.T) {
for _, tc := range []struct {
name string
json string
want string
}{
{name: "direct primary", json: `{"photos":[{"url":"https://example.com/a.jpg"},{"url":"https://example.com/b.jpg","metadata":{"primary":true}}]}`, want: "https://example.com/b.jpg"},
{name: "contact wrapper", json: `{"contact":{"photos":[{"url":"https://example.com/c.jpg"}]}}`, want: "https://example.com/c.jpg"},
{name: "person wrapper", json: `{"person":{"photos":[{"url":" https://example.com/d.jpg "}]}}`, want: "https://example.com/d.jpg"},
{name: "none", json: `{}`, want: ""},
} {
t.Run(tc.name, func(t *testing.T) {
got, err := parseGogPhotoURL([]byte(tc.json))
if err != nil {
t.Fatal(err)
}
if got != tc.want {
t.Fatalf("got %q want %q", got, tc.want)
}
})
}
if _, err := parseGogPhotoURL([]byte(`{`)); err == nil {
t.Fatal("expected invalid JSON error")
}
}
func TestFetchAvatarURL(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/missing" {
http.NotFound(w, r)
return
}
if r.URL.Path == "/too-large" {
_, _ = w.Write(make([]byte, maxAvatarBytes+1))
return
}
w.Header().Set("Content-Type", "image/png; charset=utf-8")
_, _ = w.Write([]byte("png"))
}))
defer server.Close()
avatar, err := fetchAvatarURL(t.Context(), server.URL+"/avatar.png")
if err != nil {
t.Fatal(err)
}
if string(avatar.Data) != "png" || avatar.MIME != "image/png" || !strings.HasSuffix(avatar.URL, "/avatar.png") {
t.Fatalf("avatar = %#v", avatar)
}
if _, err := fetchAvatarURL(t.Context(), "file:///tmp/avatar.png"); err == nil {
t.Fatal("expected unsupported URL error")
}
if _, err := fetchAvatarURL(t.Context(), "http://[::1"); err == nil {
t.Fatal("expected bad URL error")
}
if _, err := fetchAvatarURL(t.Context(), server.URL+"/missing"); err == nil {
t.Fatal("expected non-2xx error")
}
if _, err := fetchAvatarURL(t.Context(), server.URL+"/too-large"); err == nil {
t.Fatal("expected too large error")
}
if avatar, err := (GogAdapter{}).fetchAvatar(t.Context(), server.URL+"/avatar.png"); err != nil || string(avatar.Data) != "png" {
t.Fatalf("default fetchAvatar = %#v err=%v", avatar, err)
}
}