diff --git a/internal/discorddesktop/dm_names.go b/internal/discorddesktop/dm_names.go new file mode 100644 index 0000000..e879f14 --- /dev/null +++ b/internal/discorddesktop/dm_names.go @@ -0,0 +1,198 @@ +package discorddesktop + +import ( + "encoding/json" + "sort" + "strings" +) + +type userLabel struct { + Name string + Priority int +} + +func collectUserLabel(snap snapshot, raw map[string]any) { + id := stringField(raw, "id") + if !looksSnowflake(id) || !looksUserObject(raw) { + return + } + name, priority := userObjectLabel(raw) + if name == "" { + return + } + if existing, ok := snap.userLabels[id]; !ok || priority > existing.Priority || existing.Name == "" { + snap.userLabels[id] = userLabel{Name: name, Priority: priority} + } +} + +func looksUserObject(raw map[string]any) bool { + for _, key := range []string{"username", "global_name", "display_name", "discriminator", "avatar", "bot", "public_flags"} { + if _, ok := raw[key]; ok { + return true + } + } + return false +} + +func userObjectLabel(raw map[string]any) (string, int) { + if name := stringField(raw, "global_name"); name != "" { + return name, 3 + } + if name := stringField(raw, "display_name"); name != "" { + return name, 2 + } + if name := stringField(raw, "username"); name != "" { + return name, 1 + } + return "", 0 +} + +func inferDirectMessageNames(snap snapshot) { + authorChannels := map[string]map[string]struct{}{} + channelAuthors := map[string]map[string]int{} + for id, msg := range snap.messages { + if label, ok := snap.userLabels[msg.Record.AuthorID]; ok && shouldUseUserLabel(msg.Record.AuthorName, label) { + msg.Record.AuthorName = label.Name + msg.Record.RawJSON = withRawAuthorLabel(msg.Record.RawJSON, msg.Record.AuthorID, label) + msg.PayloadJSON = withRawAuthorLabel(msg.PayloadJSON, msg.Record.AuthorID, label) + snap.messages[id] = msg + } + if msg.Record.GuildID != DirectMessageGuildID || msg.Record.AuthorID == "" { + continue + } + if authorChannels[msg.Record.AuthorID] == nil { + authorChannels[msg.Record.AuthorID] = map[string]struct{}{} + } + authorChannels[msg.Record.AuthorID][msg.Record.ChannelID] = struct{}{} + if channelAuthors[msg.Record.ChannelID] == nil { + channelAuthors[msg.Record.ChannelID] = map[string]int{} + } + channelAuthors[msg.Record.ChannelID][msg.Record.AuthorID]++ + } + + selfID := mostRepeatedDirectMessageAuthor(authorChannels) + for id, channel := range snap.channels { + if channel.GuildID != DirectMessageGuildID || !isFallbackChannelName(channel.Name, id) { + continue + } + name := directMessageChannelName(channelAuthors[id], snap.userLabels, selfID) + if name == "" { + continue + } + channel.Name = name + channel.RawJSON = withRawChannelName(channel.RawJSON, id, channel.GuildID, name, channel.Kind) + snap.channels[id] = channel + } +} + +func shouldUseUserLabel(current string, label userLabel) bool { + if label.Name == "" || current == label.Name { + return false + } + return current == "" || label.Priority >= 2 +} + +func mostRepeatedDirectMessageAuthor(authorChannels map[string]map[string]struct{}) string { + selfID := "" + selfChannels := 1 + for authorID, channels := range authorChannels { + if len(channels) > selfChannels { + selfID = authorID + selfChannels = len(channels) + } + } + return selfID +} + +func directMessageChannelName(authorCounts map[string]int, labels map[string]userLabel, selfID string) string { + candidates := []string{} + bestID := "" + bestCount := -1 + for authorID, count := range authorCounts { + label, ok := labels[authorID] + if !ok || label.Name == "" { + continue + } + if authorID == selfID && len(authorCounts) > 1 { + continue + } + if len(authorCounts) > 2 { + candidates = append(candidates, label.Name) + continue + } + if count > bestCount || (count == bestCount && label.Priority > labels[bestID].Priority) { + bestID = authorID + bestCount = count + } + } + if len(candidates) > 0 { + sort.Strings(candidates) + return strings.Join(candidates, ", ") + } + if bestID == "" { + return "" + } + return labels[bestID].Name +} + +func isFallbackChannelName(name, id string) bool { + name = strings.TrimSpace(name) + return name == "" || name == "channel-"+shortID(id) || name == "dm-"+shortID(id) +} + +func withRawChannelName(rawJSON, id, guildID, name, kind string) string { + raw := map[string]any{} + if rawJSON != "" { + _ = json.Unmarshal([]byte(rawJSON), &raw) + } + raw["id"] = id + raw["guild_id"] = guildID + raw["name"] = name + raw["kind"] = kind + raw["source"] = "discord_desktop" + body, err := json.Marshal(raw) + if err != nil { + return rawJSON + } + return string(body) +} + +func withRawAuthorLabel(rawJSON, authorID string, label userLabel) string { + if rawJSON == "" || authorID == "" || label.Name == "" { + return rawJSON + } + raw := map[string]any{} + if err := json.Unmarshal([]byte(rawJSON), &raw); err != nil { + return rawJSON + } + author, _ := raw["author"].(map[string]any) + if author == nil { + author = map[string]any{} + } + author["id"] = authorID + if label.Priority >= 2 { + author["global_name"] = label.Name + } else { + author["username"] = label.Name + } + raw["author"] = author + body, err := json.Marshal(raw) + if err != nil { + return rawJSON + } + return string(body) +} + +func sanitizedRawAuthor(raw map[string]any, authorID string) map[string]any { + author, _ := raw["author"].(map[string]any) + out := map[string]any{} + if authorID != "" { + out["id"] = authorID + } + for _, key := range []string{"username", "global_name", "display_name"} { + if value := stringField(author, key); value != "" { + out[key] = value + } + } + return out +} diff --git a/internal/discorddesktop/import.go b/internal/discorddesktop/import.go index 6a31374..606ecd4 100644 --- a/internal/discorddesktop/import.go +++ b/internal/discorddesktop/import.go @@ -58,10 +58,11 @@ type Stats struct { } type snapshot struct { - guilds map[string]store.GuildRecord - channels map[string]store.ChannelRecord - messages map[string]store.MessageMutation - routes map[string]string + guilds map[string]store.GuildRecord + channels map[string]store.ChannelRecord + messages map[string]store.MessageMutation + routes map[string]string + userLabels map[string]userLabel } func DefaultPath() string { @@ -115,10 +116,11 @@ func scan(ctx context.Context, opts Options) (Stats, snapshot, error) { } stats := Stats{Path: root, StartedAt: now().UTC()} snap := snapshot{ - guilds: map[string]store.GuildRecord{}, - channels: map[string]store.ChannelRecord{}, - messages: map[string]store.MessageMutation{}, - routes: map[string]string{}, + guilds: map[string]store.GuildRecord{}, + channels: map[string]store.ChannelRecord{}, + messages: map[string]store.MessageMutation{}, + routes: map[string]string{}, + userLabels: map[string]userLabel{}, } if err := filepath.WalkDir(root, func(path string, entry fs.DirEntry, err error) error { if err != nil { @@ -168,6 +170,8 @@ func scan(ctx context.Context, opts Options) (Stats, snapshot, error) { return stats, snap, err } reconcileMessages(snap) + inferDirectMessageNames(snap) + reconcileMessages(snap) skippedChannels := map[string]struct{}{} for id, msg := range snap.messages { guildID := msg.Record.GuildID @@ -247,6 +251,7 @@ func writeSnapshot(ctx context.Context, st *store.Store, snap snapshot) error { func collectValue(snap snapshot, value any, fallbackTime time.Time) { switch typed := value.(type) { case map[string]any: + collectUserLabel(snap, typed) if channel, ok := parseChannel(typed); ok { snap.channels[channel.ID] = channel if channel.GuildID == DirectMessageGuildID { @@ -758,7 +763,7 @@ func channelRawJSON(raw map[string]any, id, guildID, name, kind string) string { } func messageRawJSON(raw map[string]any, id, guildID, channelID, authorID string) string { - body, _ := json.Marshal(map[string]any{ + payload := map[string]any{ "id": id, "guild_id": guildID, "channel_id": channelID, @@ -771,7 +776,11 @@ func messageRawJSON(raw map[string]any, id, guildID, channelID, authorID string) "attachment_count": lenArray(raw["attachments"]), "mention_count": lenArray(raw["mentions"]), "desktop_cache_note": "raw desktop cache payload intentionally not stored", - }) + } + if author := sanitizedRawAuthor(raw, authorID); len(author) > 0 { + payload["author"] = author + } + body, _ := json.Marshal(payload) return string(body) } diff --git a/internal/discorddesktop/import_test.go b/internal/discorddesktop/import_test.go index b5e5c56..739a23b 100644 --- a/internal/discorddesktop/import_test.go +++ b/internal/discorddesktop/import_test.go @@ -195,6 +195,53 @@ func TestImportClassifiesMessagesFromCachedChannelRoutes(t *testing.T) { require.Equal(t, [][]string{{"Discord Desktop Guild 999999999999999998"}}, guildRows) } +func TestImportInfersDirectMessageNamesFromCachedUsers(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + cachePath := filepath.Join(dir, "Cache", "Cache_Data") + require.NoError(t, os.MkdirAll(cachePath, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(`https://discord.com/channels/@me/111111111111111119 +[ +{"id":"333333333333333341","channel_id":"111111111111111119","content":"self first","timestamp":"2026-04-23T18:20:43Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}}, +{"id":"333333333333333342","channel_id":"111111111111111119","content":"self second","timestamp":"2026-04-23T18:20:44Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}}, +{"id":"333333333333333343","channel_id":"111111111111111119","content":"counterparty","timestamp":"2026-04-23T18:20:45Z","author":{"id":"222222222222222230","username":"vincentkoc"}} +] +{"user":{"id":"222222222222222230","username":"vincentkoc","global_name":"Vincent K"}} +https://discord.com/channels/@me/111111111111111120 +{"id":"333333333333333344","channel_id":"111111111111111120","content":"another dm","timestamp":"2026-04-23T18:20:46Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}} +{"id":"333333333333333345","channel_id":"111111111111111120","content":"alice reply","timestamp":"2026-04-23T18:20:47Z","author":{"id":"222222222222222231","username":"alice","global_name":"Alice"}} +`), 0o600)) + + dbPath := filepath.Join(dir, "discrawl.db") + st, err := store.Open(ctx, dbPath) + require.NoError(t, err) + defer func() { _ = st.Close() }() + + stats, err := Import(ctx, st, Options{Path: dir}) + require.NoError(t, err) + require.Equal(t, 5, stats.Messages) + require.Equal(t, 2, stats.DMChannels) + + channels, err := st.Channels(ctx, DirectMessageGuildID) + require.NoError(t, err) + namesByID := map[string]string{} + for _, channel := range channels { + namesByID[channel.ID] = channel.Name + } + require.Equal(t, "Vincent K", namesByID["111111111111111119"]) + require.Equal(t, "Alice", namesByID["111111111111111120"]) + + rows, err := st.ListMessages(ctx, store.MessageListOptions{ + GuildIDs: []string{DirectMessageGuildID}, + Channel: "Vincent", + Last: 1, + }) + require.NoError(t, err) + require.Len(t, rows, 1) + require.Equal(t, "Vincent K", rows[0].ChannelName) + require.Equal(t, "Vincent K", rows[0].AuthorName) +} + func TestImportDropsPreviousUnknownWiretapRows(t *testing.T) { ctx := context.Background() dir := t.TempDir()