feat(contacts): add dedupe preview (#555)
Adds a read-only contacts dedupe preview command with JSON/table output, generated command docs, README/spec docs, and changelog credit for the extracted idea from #116.\n\nCo-authored-by: Rohan Patnaik <rohan-patnaik@users.noreply.github.com>
This commit is contained in:
parent
e9c496efd5
commit
62a7257aba
@ -9,6 +9,7 @@
|
||||
- Docs: add `docs format` and plain-text `docs write` formatting flags for fonts, colors, bold/italic/underline/strikethrough, alignment, and line spacing. (#479) — thanks @mmaghsoodnia.
|
||||
- Drive: add `--fields` to `drive ls` and `drive get` so callers can pass Drive API field masks for fields beyond the default JSON set. (#495) — thanks @karbassi.
|
||||
- Drive: add read-only `drive tree`, `drive du`, and `drive inventory` reports for auditing folder contents and sizes. (#116) — thanks @rohan-patnaik.
|
||||
- Contacts: add preview-only `contacts dedupe` to find likely duplicate contacts by email/phone, with opt-in name matching and JSON/table merge plans. (#116) — thanks @rohan-patnaik.
|
||||
- Sheets: add `sheets table` list/get/create/delete commands for Google Sheets structured tables. (#470) — thanks @Pedrohgv.
|
||||
- Agent safety: add baked safety-profile builds for fail-closed agent binaries, with `agent-safe`, `readonly`, and `full` profiles, filtered help/schema output, docs, and build tooling. (#366, #239) — thanks @drewburchfield.
|
||||
- Calendar: add `--with-meet` to `calendar update` for adding Google Meet conferencing to existing events. (#538) — thanks @alexisperumal.
|
||||
|
||||
@ -1324,6 +1324,10 @@ gog contacts get people/<resourceName> --json | \
|
||||
|
||||
gog contacts delete people/<resourceName>
|
||||
|
||||
# Dedupe preview (read-only)
|
||||
gog contacts dedupe
|
||||
gog contacts dedupe --match email,phone,name --json
|
||||
|
||||
# Workspace directory (requires Google Workspace)
|
||||
gog contacts directory list --max 50
|
||||
gog contacts directory search "Jane" --max 50
|
||||
|
||||
@ -190,6 +190,7 @@ Generated from `gog schema --json`.
|
||||
- [`gog config unset (rm,del,remove) <key>`](commands/gog-config-unset.md) - Unset a config value
|
||||
- [`gog contacts (contact) <command> [flags]`](commands/gog-contacts.md) - Google Contacts
|
||||
- [`gog contacts (contact) create (add,new) [flags]`](commands/gog-contacts-create.md) - Create a contact
|
||||
- [`gog contacts (contact) dedupe [flags]`](commands/gog-contacts-dedupe.md) - Find likely duplicate contacts (preview only)
|
||||
- [`gog contacts (contact) delete (rm,del,remove) <resourceName>`](commands/gog-contacts-delete.md) - Delete a contact
|
||||
- [`gog contacts (contact) directory <command>`](commands/gog-contacts-directory.md) - Directory contacts
|
||||
- [`gog contacts (contact) directory list [flags]`](commands/gog-contacts-directory-list.md) - List people from the Workspace directory
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
Every `gog` command has a generated docs page. The source of truth is the live CLI schema; run `make docs-commands` after changing command names, flags, help text, aliases, or arguments.
|
||||
|
||||
Generated pages: 469.
|
||||
Generated pages: 470.
|
||||
|
||||
## Top-level Commands
|
||||
|
||||
@ -233,6 +233,7 @@ Generated pages: 469.
|
||||
- [gog config unset](gog-config-unset.md) - Unset a config value
|
||||
- [gog contacts](gog-contacts.md) - Google Contacts
|
||||
- [gog contacts create](gog-contacts-create.md) - Create a contact
|
||||
- [gog contacts dedupe](gog-contacts-dedupe.md) - Find likely duplicate contacts (preview only)
|
||||
- [gog contacts delete](gog-contacts-delete.md) - Delete a contact
|
||||
- [gog contacts directory](gog-contacts-directory.md) - Directory contacts
|
||||
- [gog contacts directory list](gog-contacts-directory-list.md) - List people from the Workspace directory
|
||||
|
||||
45
docs/commands/gog-contacts-dedupe.md
Normal file
45
docs/commands/gog-contacts-dedupe.md
Normal file
@ -0,0 +1,45 @@
|
||||
# `gog contacts dedupe`
|
||||
|
||||
> Generated from `gog schema --json`. Do not edit this page by hand; run `make docs-commands`.
|
||||
|
||||
Find likely duplicate contacts (preview only)
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
gog contacts (contact) dedupe [flags]
|
||||
```
|
||||
|
||||
## Parent
|
||||
|
||||
- [gog contacts](gog-contacts.md)
|
||||
|
||||
## Flags
|
||||
|
||||
| Flag | Type | Default | Help |
|
||||
| --- | --- | --- | --- |
|
||||
| `--access-token` | `string` | | Use provided access token directly (bypasses stored refresh tokens; token expires in ~1h) |
|
||||
| `-a`<br>`--account`<br>`--acct` | `string` | | Account email for API commands (gmail/calendar/chat/classroom/drive/docs/slides/contacts/tasks/people/sheets/forms/appscript/ads) |
|
||||
| `--client` | `string` | | OAuth client name (selects stored credentials + token bucket) |
|
||||
| `--color` | `string` | auto | Color output: auto\|always\|never |
|
||||
| `--disable-commands` | `string` | | Comma-separated list of disabled commands; dot paths allowed |
|
||||
| `-n`<br>`--dry-run`<br>`--dryrun`<br>`--noop`<br>`--preview` | `bool` | | Do not make changes; print intended actions and exit successfully |
|
||||
| `--enable-commands` | `string` | | Comma-separated list of enabled commands; dot paths allowed (restricts CLI) |
|
||||
| `--fail-empty`<br>`--non-empty`<br>`--require-results` | `bool` | | Exit with code 3 if no duplicates |
|
||||
| `-y`<br>`--force`<br>`--assume-yes`<br>`--yes` | `bool` | | Skip confirmations for destructive commands |
|
||||
| `--gmail-no-send` | `bool` | false | Block Gmail send operations (agent safety) |
|
||||
| `-h`<br>`--help` | `kong.helpFlag` | | Show context-sensitive help. |
|
||||
| `-j`<br>`--json`<br>`--machine` | `bool` | false | Output JSON to stdout (best for scripting) |
|
||||
| `--match` | `string` | email,phone | Match fields: email,phone,name |
|
||||
| `--max`<br>`--limit` | `int64` | 0 | Max contacts to scan (0 = all) |
|
||||
| `--no-input`<br>`--non-interactive`<br>`--noninteractive` | `bool` | | Never prompt; fail instead (useful for CI) |
|
||||
| `-p`<br>`--plain`<br>`--tsv` | `bool` | false | Output stable, parseable text to stdout (TSV; no colors) |
|
||||
| `--results-only` | `bool` | | In JSON mode, emit only the primary result (drops envelope fields like nextPageToken) |
|
||||
| `--select`<br>`--pick`<br>`--project` | `string` | | In JSON mode, select comma-separated fields (best-effort; supports dot paths). Desire path: use --fields for most commands. |
|
||||
| `-v`<br>`--verbose` | `bool` | | Enable verbose logging |
|
||||
| `--version` | `kong.VersionFlag` | | Print version and exit |
|
||||
|
||||
## See Also
|
||||
|
||||
- [gog contacts](gog-contacts.md)
|
||||
- [Command index](README.md)
|
||||
@ -17,6 +17,7 @@ gog contacts (contact) <command> [flags]
|
||||
## Subcommands
|
||||
|
||||
- [gog contacts create](gog-contacts-create.md) - Create a contact
|
||||
- [gog contacts dedupe](gog-contacts-dedupe.md) - Find likely duplicate contacts (preview only)
|
||||
- [gog contacts delete](gog-contacts-delete.md) - Delete a contact
|
||||
- [gog contacts directory](gog-contacts-directory.md) - Directory contacts
|
||||
- [gog contacts export](gog-contacts-export.md) - Export contacts as vCard (.vcf)
|
||||
|
||||
@ -278,6 +278,7 @@ Flag aliases:
|
||||
- `gog classroom guardian-invitations get <studentId> <invitationId>`
|
||||
- `gog classroom guardian-invitations create <studentId> --email EMAIL`
|
||||
- `gog classroom profile [userId]`
|
||||
- `gog contacts dedupe [--match email,phone,name] [--max N]`
|
||||
- `gog gmail search <query> [--max N] [--page TOKEN]`
|
||||
- `gog gmail messages search <query> [--max N] [--page TOKEN] [--include-body] [--body-format text|html] [--full]`
|
||||
- `gog gmail autoreply <query> [--max N] [--subject S] [--body B|--body-file PATH|--body-html HTML] [--from addr] [--reply-to addr] [--label L] [--archive] [--mark-read] [--skip-bulk] [--allow-self]`
|
||||
|
||||
@ -17,6 +17,7 @@ type ContactsCmd struct {
|
||||
List ContactsListCmd `cmd:"" name:"list" aliases:"ls" help:"List contacts"`
|
||||
Get ContactsGetCmd `cmd:"" name:"get" aliases:"info,show" help:"Get a contact"`
|
||||
Export ContactsExportCmd `cmd:"" name:"export" help:"Export contacts as vCard (.vcf)"`
|
||||
Dedupe ContactsDedupeCmd `cmd:"" name:"dedupe" help:"Find likely duplicate contacts (preview only)"`
|
||||
Create ContactsCreateCmd `cmd:"" name:"create" aliases:"add,new" help:"Create a contact"`
|
||||
Update ContactsUpdateCmd `cmd:"" name:"update" aliases:"edit,set" help:"Update a contact"`
|
||||
Delete ContactsDeleteCmd `cmd:"" name:"delete" aliases:"rm,del,remove" help:"Delete a contact"`
|
||||
|
||||
461
internal/cmd/contacts_dedupe.go
Normal file
461
internal/cmd/contacts_dedupe.go
Normal file
@ -0,0 +1,461 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"google.golang.org/api/people/v1"
|
||||
|
||||
"github.com/steipete/gogcli/internal/outfmt"
|
||||
"github.com/steipete/gogcli/internal/ui"
|
||||
)
|
||||
|
||||
type ContactsDedupeCmd struct {
|
||||
Match string `name:"match" help:"Match fields: email,phone,name" default:"email,phone"`
|
||||
Max int64 `name:"max" aliases:"limit" help:"Max contacts to scan (0 = all)" default:"0"`
|
||||
FailEmpty bool `name:"fail-empty" aliases:"non-empty,require-results" help:"Exit with code 3 if no duplicates"`
|
||||
}
|
||||
|
||||
func (c *ContactsDedupeCmd) Run(ctx context.Context, flags *RootFlags) error {
|
||||
u := ui.FromContext(ctx)
|
||||
account, err := requireAccount(flags)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
match, err := parseContactsDedupeMatch(c.Match)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if c.Max < 0 {
|
||||
return usage("--max must be >= 0")
|
||||
}
|
||||
|
||||
svc, err := newPeopleContactsService(ctx, account)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
contacts, err := contactsDedupeList(ctx, svc, c.Max)
|
||||
if err != nil {
|
||||
return wrapPeopleAPIError(err)
|
||||
}
|
||||
|
||||
groups := buildContactsDedupeGroups(contacts, match)
|
||||
if err := writeContactsDedupe(ctx, u, groups, len(contacts)); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(groups) == 0 {
|
||||
return failEmptyExit(c.FailEmpty)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type contactsDedupeMatch struct {
|
||||
Email bool
|
||||
Phone bool
|
||||
Name bool
|
||||
}
|
||||
|
||||
func parseContactsDedupeMatch(value string) (contactsDedupeMatch, error) {
|
||||
out := contactsDedupeMatch{}
|
||||
for _, part := range strings.Split(value, ",") {
|
||||
switch strings.TrimSpace(strings.ToLower(part)) {
|
||||
case "email":
|
||||
out.Email = true
|
||||
case "phone":
|
||||
out.Phone = true
|
||||
case "name":
|
||||
out.Name = true
|
||||
case "":
|
||||
continue
|
||||
default:
|
||||
return contactsDedupeMatch{}, usagef("invalid --match %q (use email, phone, name)", part)
|
||||
}
|
||||
}
|
||||
if !out.Email && !out.Phone && !out.Name {
|
||||
return contactsDedupeMatch{}, usage("invalid --match (no fields enabled)")
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func contactsDedupeList(ctx context.Context, svc *people.Service, maxResults int64) ([]*people.Person, error) {
|
||||
var out []*people.Person
|
||||
pageToken := ""
|
||||
for {
|
||||
pageSize := int64(500)
|
||||
if maxResults > 0 && maxResults-int64(len(out)) < pageSize {
|
||||
pageSize = maxResults - int64(len(out))
|
||||
}
|
||||
resp, err := svc.People.Connections.List(peopleMeResource).
|
||||
PersonFields(contactsReadMask).
|
||||
PageSize(pageSize).
|
||||
PageToken(pageToken).
|
||||
RequestSyncToken(false).
|
||||
Context(ctx).
|
||||
Do()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, p := range resp.Connections {
|
||||
if p != nil {
|
||||
out = append(out, p)
|
||||
}
|
||||
if maxResults > 0 && int64(len(out)) >= maxResults {
|
||||
return out, nil
|
||||
}
|
||||
}
|
||||
if resp.NextPageToken == "" {
|
||||
return out, nil
|
||||
}
|
||||
pageToken = resp.NextPageToken
|
||||
}
|
||||
}
|
||||
|
||||
type contactsDedupeGroup struct {
|
||||
Primary *people.Person
|
||||
Members []*people.Person
|
||||
MatchedOn []string
|
||||
Merged contactsDedupeSummary
|
||||
}
|
||||
|
||||
type contactsDedupeSummary struct {
|
||||
Resource string `json:"resource,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Emails []string `json:"emails,omitempty"`
|
||||
Phones []string `json:"phones,omitempty"`
|
||||
}
|
||||
|
||||
func buildContactsDedupeGroups(contacts []*people.Person, match contactsDedupeMatch) []contactsDedupeGroup {
|
||||
if len(contacts) == 0 {
|
||||
return nil
|
||||
}
|
||||
uf := newContactsDedupeUnionFind(len(contacts))
|
||||
keyOwners := map[string]int{}
|
||||
keyCounts := map[string]int{}
|
||||
groupKeys := map[int]map[string]bool{}
|
||||
for i, p := range contacts {
|
||||
for _, key := range contactsDedupeKeys(p, match) {
|
||||
keyCounts[key]++
|
||||
if prev, ok := keyOwners[key]; ok {
|
||||
uf.union(i, prev)
|
||||
} else {
|
||||
keyOwners[key] = i
|
||||
}
|
||||
}
|
||||
}
|
||||
for key, owner := range keyOwners {
|
||||
if keyCounts[key] < 2 {
|
||||
continue
|
||||
}
|
||||
root := uf.find(owner)
|
||||
if groupKeys[root] == nil {
|
||||
groupKeys[root] = map[string]bool{}
|
||||
}
|
||||
groupKeys[root][key] = true
|
||||
}
|
||||
|
||||
byRoot := map[int][]*people.Person{}
|
||||
for i, p := range contacts {
|
||||
byRoot[uf.find(i)] = append(byRoot[uf.find(i)], p)
|
||||
}
|
||||
|
||||
groups := make([]contactsDedupeGroup, 0)
|
||||
for root, members := range byRoot {
|
||||
if len(members) < 2 {
|
||||
continue
|
||||
}
|
||||
primary := chooseContactsDedupePrimary(members)
|
||||
matchedOn := sortedContactsDedupeKeys(groupKeys[root])
|
||||
groups = append(groups, contactsDedupeGroup{
|
||||
Primary: primary,
|
||||
Members: orderContactsDedupeMembers(primary, members),
|
||||
MatchedOn: matchedOn,
|
||||
Merged: summarizeContactsDedupeMerge(primary, members),
|
||||
})
|
||||
}
|
||||
sort.Slice(groups, func(i, j int) bool {
|
||||
return contactsDedupeResource(groups[i].Primary) < contactsDedupeResource(groups[j].Primary)
|
||||
})
|
||||
return groups
|
||||
}
|
||||
|
||||
func contactsDedupeKeys(p *people.Person, match contactsDedupeMatch) []string {
|
||||
var keys []string
|
||||
if p == nil {
|
||||
return keys
|
||||
}
|
||||
if match.Email {
|
||||
for _, email := range p.EmailAddresses {
|
||||
if email != nil {
|
||||
if v := normalizeContactEmail(email.Value); v != "" {
|
||||
keys = append(keys, "email:"+v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if match.Phone {
|
||||
for _, phone := range p.PhoneNumbers {
|
||||
if phone != nil {
|
||||
if v := normalizeContactPhone(phone.Value); v != "" {
|
||||
keys = append(keys, "phone:"+v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if match.Name {
|
||||
if v := normalizeContactName(primaryName(p)); v != "" {
|
||||
keys = append(keys, "name:"+v)
|
||||
}
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
func chooseContactsDedupePrimary(members []*people.Person) *people.Person {
|
||||
var best *people.Person
|
||||
bestScore := -1
|
||||
for _, p := range members {
|
||||
score := contactsDedupeScore(p)
|
||||
if best == nil || score > bestScore || score == bestScore && contactsDedupeResource(p) < contactsDedupeResource(best) {
|
||||
best = p
|
||||
bestScore = score
|
||||
}
|
||||
}
|
||||
return best
|
||||
}
|
||||
|
||||
func contactsDedupeScore(p *people.Person) int {
|
||||
if p == nil {
|
||||
return 0
|
||||
}
|
||||
score := 0
|
||||
if primaryName(p) != "" {
|
||||
score += 2
|
||||
}
|
||||
score += len(p.EmailAddresses) * 2
|
||||
score += len(p.PhoneNumbers) * 2
|
||||
if len(p.Organizations) > 0 {
|
||||
score++
|
||||
}
|
||||
if len(p.Urls) > 0 {
|
||||
score++
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
func summarizeContactsDedupeMerge(primary *people.Person, members []*people.Person) contactsDedupeSummary {
|
||||
ordered := orderContactsDedupeMembers(primary, members)
|
||||
return contactsDedupeSummary{
|
||||
Resource: contactsDedupeResource(primary),
|
||||
Name: firstContactsDedupeName(primary, ordered),
|
||||
Emails: uniqueContactsDedupeEmails(ordered),
|
||||
Phones: uniqueContactsDedupePhones(ordered),
|
||||
}
|
||||
}
|
||||
|
||||
func writeContactsDedupe(ctx context.Context, u *ui.UI, groups []contactsDedupeGroup, scanned int) error {
|
||||
if outfmt.IsJSON(ctx) {
|
||||
payload := map[string]any{
|
||||
"scanned": scanned,
|
||||
"groups": contactsDedupeGroupsJSON(groups),
|
||||
}
|
||||
return outfmt.WriteJSON(ctx, os.Stdout, payload)
|
||||
}
|
||||
if len(groups) == 0 {
|
||||
if u != nil {
|
||||
u.Err().Printf("No duplicate contacts found (scanned %d)", scanned)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
w, flush := tableWriter(ctx)
|
||||
defer flush()
|
||||
fmt.Fprintln(w, "GROUP\tACTION\tRESOURCE\tNAME\tEMAIL\tPHONE\tMATCHED_ON")
|
||||
for i, group := range groups {
|
||||
matchedOn := strings.Join(group.MatchedOn, ",")
|
||||
for _, member := range group.Members {
|
||||
action := "merge"
|
||||
if contactsDedupeResource(member) == contactsDedupeResource(group.Primary) {
|
||||
action = "keep"
|
||||
}
|
||||
fmt.Fprintf(w, "%d\t%s\t%s\t%s\t%s\t%s\t%s\n",
|
||||
i+1,
|
||||
action,
|
||||
sanitizeTab(contactsDedupeResource(member)),
|
||||
sanitizeTab(primaryName(member)),
|
||||
sanitizeTab(primaryEmail(member)),
|
||||
sanitizeTab(primaryPhone(member)),
|
||||
sanitizeTab(matchedOn),
|
||||
)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func contactsDedupeGroupsJSON(groups []contactsDedupeGroup) []map[string]any {
|
||||
out := make([]map[string]any, 0, len(groups))
|
||||
for _, group := range groups {
|
||||
members := make([]contactsDedupeSummary, 0, len(group.Members))
|
||||
for _, member := range group.Members {
|
||||
members = append(members, summarizeContactsDedupeContact(member))
|
||||
}
|
||||
out = append(out, map[string]any{
|
||||
"primary": summarizeContactsDedupeContact(group.Primary),
|
||||
"merged": group.Merged,
|
||||
"matched_on": group.MatchedOn,
|
||||
"members": members,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func summarizeContactsDedupeContact(p *people.Person) contactsDedupeSummary {
|
||||
if p == nil {
|
||||
return contactsDedupeSummary{}
|
||||
}
|
||||
return contactsDedupeSummary{
|
||||
Resource: p.ResourceName,
|
||||
Name: primaryName(p),
|
||||
Emails: uniqueContactsDedupeEmails([]*people.Person{p}),
|
||||
Phones: uniqueContactsDedupePhones([]*people.Person{p}),
|
||||
}
|
||||
}
|
||||
|
||||
func orderContactsDedupeMembers(primary *people.Person, members []*people.Person) []*people.Person {
|
||||
out := make([]*people.Person, 0, len(members))
|
||||
if primary != nil {
|
||||
out = append(out, primary)
|
||||
}
|
||||
for _, member := range members {
|
||||
if member == nil || contactsDedupeResource(member) == contactsDedupeResource(primary) {
|
||||
continue
|
||||
}
|
||||
out = append(out, member)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func firstContactsDedupeName(primary *people.Person, members []*people.Person) string {
|
||||
if name := primaryName(primary); name != "" {
|
||||
return name
|
||||
}
|
||||
for _, member := range members {
|
||||
if name := primaryName(member); name != "" {
|
||||
return name
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func uniqueContactsDedupeEmails(members []*people.Person) []string {
|
||||
seen := map[string]bool{}
|
||||
var out []string
|
||||
for _, p := range members {
|
||||
if p == nil {
|
||||
continue
|
||||
}
|
||||
for _, email := range p.EmailAddresses {
|
||||
if email == nil {
|
||||
continue
|
||||
}
|
||||
if key := normalizeContactEmail(email.Value); key != "" && !seen[key] {
|
||||
seen[key] = true
|
||||
out = append(out, strings.TrimSpace(email.Value))
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func uniqueContactsDedupePhones(members []*people.Person) []string {
|
||||
seen := map[string]bool{}
|
||||
var out []string
|
||||
for _, p := range members {
|
||||
if p == nil {
|
||||
continue
|
||||
}
|
||||
for _, phone := range p.PhoneNumbers {
|
||||
if phone == nil {
|
||||
continue
|
||||
}
|
||||
if key := normalizeContactPhone(phone.Value); key != "" && !seen[key] {
|
||||
seen[key] = true
|
||||
out = append(out, strings.TrimSpace(phone.Value))
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeContactEmail(value string) string {
|
||||
return strings.ToLower(strings.TrimSpace(value))
|
||||
}
|
||||
|
||||
func normalizeContactPhone(value string) string {
|
||||
var b strings.Builder
|
||||
for _, r := range value {
|
||||
if r >= '0' && r <= '9' {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func normalizeContactName(value string) string {
|
||||
return strings.Join(strings.Fields(strings.ToLower(strings.TrimSpace(value))), " ")
|
||||
}
|
||||
|
||||
func contactsDedupeResource(p *people.Person) string {
|
||||
if p == nil {
|
||||
return ""
|
||||
}
|
||||
return p.ResourceName
|
||||
}
|
||||
|
||||
func sortedContactsDedupeKeys(keys map[string]bool) []string {
|
||||
out := make([]string, 0, len(keys))
|
||||
for key := range keys {
|
||||
out = append(out, key)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
|
||||
type contactsDedupeUnionFind struct {
|
||||
parent []int
|
||||
rank []int
|
||||
}
|
||||
|
||||
func newContactsDedupeUnionFind(n int) *contactsDedupeUnionFind {
|
||||
parent := make([]int, n)
|
||||
for i := range parent {
|
||||
parent[i] = i
|
||||
}
|
||||
return &contactsDedupeUnionFind{parent: parent, rank: make([]int, n)}
|
||||
}
|
||||
|
||||
func (u *contactsDedupeUnionFind) find(x int) int {
|
||||
if u.parent[x] != x {
|
||||
u.parent[x] = u.find(u.parent[x])
|
||||
}
|
||||
return u.parent[x]
|
||||
}
|
||||
|
||||
func (u *contactsDedupeUnionFind) union(a int, b int) {
|
||||
ra := u.find(a)
|
||||
rb := u.find(b)
|
||||
if ra == rb {
|
||||
return
|
||||
}
|
||||
if u.rank[ra] < u.rank[rb] {
|
||||
u.parent[ra] = rb
|
||||
return
|
||||
}
|
||||
if u.rank[ra] > u.rank[rb] {
|
||||
u.parent[rb] = ra
|
||||
return
|
||||
}
|
||||
u.parent[rb] = ra
|
||||
u.rank[ra]++
|
||||
}
|
||||
127
internal/cmd/contacts_dedupe_test.go
Normal file
127
internal/cmd/contacts_dedupe_test.go
Normal file
@ -0,0 +1,127 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"google.golang.org/api/people/v1"
|
||||
)
|
||||
|
||||
func TestParseContactsDedupeMatch(t *testing.T) {
|
||||
got, err := parseContactsDedupeMatch("email, phone")
|
||||
if err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if !got.Email || !got.Phone || got.Name {
|
||||
t.Fatalf("unexpected match: %#v", got)
|
||||
}
|
||||
if _, err := parseContactsDedupeMatch("email,bogus"); err == nil {
|
||||
t.Fatalf("expected invalid field error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildContactsDedupeGroupsTransitive(t *testing.T) {
|
||||
contacts := []*people.Person{
|
||||
testDedupePerson("people/1", "Ada One", []string{"ada@example.com"}, nil),
|
||||
testDedupePerson("people/2", "Ada Two", []string{"ADA@example.com"}, []string{"+1 (555) 0100"}),
|
||||
testDedupePerson("people/3", "Ada Three", nil, []string{"15550100"}),
|
||||
testDedupePerson("people/4", "Grace", []string{"grace@example.com"}, nil),
|
||||
}
|
||||
groups := buildContactsDedupeGroups(contacts, contactsDedupeMatch{Email: true, Phone: true})
|
||||
if len(groups) != 1 {
|
||||
t.Fatalf("groups = %d, want 1: %#v", len(groups), groups)
|
||||
}
|
||||
if got := len(groups[0].Members); got != 3 {
|
||||
t.Fatalf("members = %d, want 3", got)
|
||||
}
|
||||
if !reflect.DeepEqual(groups[0].MatchedOn, []string{"email:ada@example.com", "phone:15550100"}) {
|
||||
t.Fatalf("matched_on = %#v", groups[0].MatchedOn)
|
||||
}
|
||||
if groups[0].Primary.ResourceName != "people/2" {
|
||||
t.Fatalf("primary = %s, want people/2", groups[0].Primary.ResourceName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildContactsDedupeGroupsNameOptIn(t *testing.T) {
|
||||
contacts := []*people.Person{
|
||||
testDedupePerson("people/1", "Ada Lovelace", nil, nil),
|
||||
testDedupePerson("people/2", " ada lovelace ", nil, nil),
|
||||
}
|
||||
if groups := buildContactsDedupeGroups(contacts, contactsDedupeMatch{Email: true, Phone: true}); len(groups) != 0 {
|
||||
t.Fatalf("default match should ignore name-only duplicates: %#v", groups)
|
||||
}
|
||||
if groups := buildContactsDedupeGroups(contacts, contactsDedupeMatch{Name: true}); len(groups) != 1 {
|
||||
t.Fatalf("name match should find one group, got %d", len(groups))
|
||||
}
|
||||
}
|
||||
|
||||
func TestContactsDedupeExecuteJSON(t *testing.T) {
|
||||
svc, closeSrv := newPeopleService(t, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet || r.URL.Path != "/v1/people/me/connections" {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
if got := r.URL.Query().Get("personFields"); !strings.Contains(got, "emailAddresses") {
|
||||
t.Fatalf("missing personFields: %q", got)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"connections": []map[string]any{
|
||||
{
|
||||
"resourceName": "people/1",
|
||||
"names": []map[string]any{{"displayName": "Ada One"}},
|
||||
"emailAddresses": []map[string]any{{"value": "ada@example.com"}},
|
||||
},
|
||||
{
|
||||
"resourceName": "people/2",
|
||||
"names": []map[string]any{{"displayName": "Ada Two"}},
|
||||
"emailAddresses": []map[string]any{{"value": "ADA@example.com"}},
|
||||
},
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer closeSrv()
|
||||
stubPeopleServices(t, svc)
|
||||
|
||||
out := captureStdout(t, func() {
|
||||
_ = captureStderr(t, func() {
|
||||
if err := Execute([]string{"--json", "--account", "a@example.com", "contacts", "dedupe"}); err != nil {
|
||||
t.Fatalf("Execute: %v", err)
|
||||
}
|
||||
})
|
||||
})
|
||||
var parsed struct {
|
||||
Scanned int `json:"scanned"`
|
||||
Groups []struct {
|
||||
MatchedOn []string `json:"matched_on"`
|
||||
Members []struct {
|
||||
Resource string `json:"resource"`
|
||||
} `json:"members"`
|
||||
} `json:"groups"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(out), &parsed); err != nil {
|
||||
t.Fatalf("json parse: %v\n%s", err, out)
|
||||
}
|
||||
if parsed.Scanned != 2 || len(parsed.Groups) != 1 || len(parsed.Groups[0].Members) != 2 {
|
||||
t.Fatalf("unexpected payload: %#v", parsed)
|
||||
}
|
||||
if !reflect.DeepEqual(parsed.Groups[0].MatchedOn, []string{"email:ada@example.com"}) {
|
||||
t.Fatalf("matched_on = %#v", parsed.Groups[0].MatchedOn)
|
||||
}
|
||||
}
|
||||
|
||||
func testDedupePerson(resource, name string, emails, phones []string) *people.Person {
|
||||
p := &people.Person{ResourceName: resource}
|
||||
if name != "" {
|
||||
p.Names = []*people.Name{{DisplayName: name}}
|
||||
}
|
||||
for _, email := range emails {
|
||||
p.EmailAddresses = append(p.EmailAddresses, &people.EmailAddress{Value: email})
|
||||
}
|
||||
for _, phone := range phones {
|
||||
p.PhoneNumbers = append(p.PhoneNumbers, &people.PhoneNumber{Value: phone})
|
||||
}
|
||||
return p
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user