From fb969672e0534b495769783c06c973c471bc7163 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Fri, 8 May 2026 08:37:27 +0100 Subject: [PATCH] test: cover cli and archive helper edges --- docs/README.md | 6 +- docs/commands/tui.md | 47 ++++ internal/cli/cli_test.go | 244 ++++++++++++++++++ .../discorddesktop/import_helpers_test.go | 106 ++++++++ .../import_value_helpers_test.go | 85 ++++++ internal/embed/provider_test.go | 68 +++++ internal/store/store_test.go | 128 +++++++++ internal/syncer/message_sync_helpers_test.go | 214 +++++++++++++++ 8 files changed, 895 insertions(+), 3 deletions(-) create mode 100644 docs/commands/tui.md create mode 100644 internal/syncer/message_sync_helpers_test.go diff --git a/docs/README.md b/docs/README.md index d860c7c..de725fc 100644 --- a/docs/README.md +++ b/docs/README.md @@ -31,9 +31,9 @@ discrawl search "panic: nil pointer" discrawl tail ``` -`discrawl tui` uses the shared crawlkit terminal explorer: channel/person/thread -groups on the left, message rows in the middle, and readable message/thread -detail on the right. +[`discrawl tui`](commands/tui.html) uses the shared crawlkit terminal explorer: +channel/person/thread groups on the left, message rows in the middle, and +readable message/thread detail on the right. ## Sections diff --git a/docs/commands/tui.md b/docs/commands/tui.md new file mode 100644 index 0000000..59eabf7 --- /dev/null +++ b/docs/commands/tui.md @@ -0,0 +1,47 @@ +# `tui` + +Opens the local terminal archive browser for stored messages. + +## Usage + +```bash +discrawl tui +discrawl tui --guild 123456789012345678 --channel general +discrawl tui --guilds 123,456 --author 1456464433768300635 +discrawl tui --dm +discrawl --json tui --limit 50 +``` + +## What it shows + +The browser uses the shared crawlkit explorer: + +- left pane: channel, person, or thread groups +- middle pane: newest matching message rows +- right pane: selected message detail, attachments, replies, and thread context +- footer: local DB or remote Git snapshot source + +Mouse selection, right-click actions, sortable headers, refresh, and chat layout match the other crawlkit-backed archive tools. + +## Flags + +- `--guild ` / `--guilds ` - restrict the guild scope +- `--dm` - browse local direct messages under the synthetic `@me` guild +- `--channel ` - restrict to one channel or DM conversation +- `--author ` - restrict to one author +- `--limit ` - newest rows to load (default 200) +- `--include-empty` - include rows with no displayable/searchable content +- `--json` - print crawlkit browser rows as JSON instead of opening the TUI + +## Notes + +- `tui` is read-only. +- without `--guild`, `--guilds`, or `--dm`, it uses `default_guild_id` when configured; otherwise it can browse all stored guild rows +- `--dm` only shows messages imported from the local Discord Desktop cache by [`wiretap`](wiretap.html) +- `--json` is useful for launchers and agents that want the same row shape without an interactive terminal + +## See also + +- [`messages`](messages.html) +- [`dms`](dms.html) +- [`wiretap`](wiretap.html) diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index 5d1e07e..b5b77dd 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -4,6 +4,8 @@ import ( "bytes" "context" "encoding/json" + "errors" + "io" "log/slog" "net/http" "net/http/httptest" @@ -20,6 +22,8 @@ import ( "github.com/openclaw/discrawl/internal/config" discordclient "github.com/openclaw/discrawl/internal/discord" + "github.com/openclaw/discrawl/internal/discorddesktop" + "github.com/openclaw/discrawl/internal/report" "github.com/openclaw/discrawl/internal/share" "github.com/openclaw/discrawl/internal/store" "github.com/openclaw/discrawl/internal/syncer" @@ -38,6 +42,192 @@ func TestHelpAndVersion(t *testing.T) { err := Run(context.Background(), []string{"bogus"}, &out, &bytes.Buffer{}) require.Equal(t, 2, ExitCode(err)) + require.Equal(t, 1, ExitCode(context.Canceled)) + require.Equal(t, 7, ExitCode(&cliError{code: 7, err: errors.New("custom")})) +} + +func TestCommandValidationEdges(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + cfgPath := filepath.Join(dir, "config.toml") + dbPath := filepath.Join(dir, "discrawl.db") + cfg := config.Default() + cfg.DBPath = dbPath + cfg.Discord.TokenSource = "none" + require.NoError(t, config.Write(cfgPath, cfg)) + s, err := store.Open(ctx, dbPath) + require.NoError(t, err) + require.NoError(t, s.Close()) + + cases := [][]string{ + {"--config", cfgPath, "--bogus"}, + {"--config", cfgPath, "search"}, + {"--config", cfgPath, "search", "--mode", "bogus", "term"}, + {"--config", cfgPath, "messages"}, + {"--config", cfgPath, "messages", "--hours", "-1", "--channel", "general"}, + {"--config", cfgPath, "messages", "--hours", "1", "--days", "1", "--channel", "general"}, + {"--config", cfgPath, "messages", "--all", "--last", "1", "--channel", "general"}, + {"--config", cfgPath, "messages", "--dm", "--sync", "--channel", "alice"}, + {"--config", cfgPath, "dms", "--hours", "-1"}, + {"--config", cfgPath, "dms", "--limit", "1", "--last", "1", "--with", "alice"}, + {"--config", cfgPath, "mentions"}, + {"--config", cfgPath, "mentions", "--days", "-1", "--target", "u1"}, + {"--config", cfgPath, "mentions", "--type", "channel", "--target", "u1"}, + {"--config", cfgPath, "digest", "--since", "-1d"}, + {"--config", cfgPath, "analytics", "wat"}, + {"--config", cfgPath, "analytics", "quiet", "extra"}, + {"--config", cfgPath, "analytics", "trends", "--weeks", "-1"}, + {"--config", cfgPath, "channels"}, + {"--config", cfgPath, "channels", "wat"}, + {"--config", cfgPath, "channels", "show"}, + {"--config", cfgPath, "status", "extra"}, + {"--config", cfgPath, "report", "extra"}, + {"--config", cfgPath, "wiretap", "extra"}, + {"--config", cfgPath, "wiretap", "--max-file-bytes", "0"}, + {"--config", cfgPath, "sync", "--source", "bogus"}, + {"--config", cfgPath, "sync", "--since", "not-time"}, + {"--config", cfgPath, "sync", "--no-update", "--update", "force"}, + {"--config", cfgPath, "publish", "--remote", ""}, + {"--config", cfgPath, "subscribe"}, + {"--config", cfgPath, "update", "extra"}, + {"--config", cfgPath, "sql", "--confirm", "select 1"}, + {"--config", cfgPath, "sql", "--unsafe", "select 1"}, + {"--config", cfgPath, "members"}, + {"--config", cfgPath, "members", "wat"}, + } + for _, args := range cases { + var stdout, stderr bytes.Buffer + err := Run(ctx, args, &stdout, &stderr) + require.Error(t, err, args) + } +} + +func TestOutputBranches(t *testing.T) { + now := time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC) + values := []any{ + syncRunStats{ + Source: "both", + Discord: &syncer.SyncStats{Guilds: 1, Channels: 2, Threads: 3, Members: 4, Messages: 5}, + Wiretap: &discorddesktop.Stats{ + Path: "/tmp/discord", + FilesVisited: 1, + FilesScanned: 2, + FilesSkipped: 3, + FilesUnchanged: 4, + CacheFilesFastSkipped: 5, + JSONObjects: 6, + Guilds: 7, + Channels: 8, + Messages: 9, + DMMessages: 10, + DMChannels: 11, + GuildMessages: 12, + SkippedMessages: 13, + SkippedChannels: 14, + Checkpoints: 15, + FullCache: true, + DryRun: true, + }, + }, + syncer.SyncStats{Guilds: 1, Channels: 2, Threads: 3, Members: 4, Messages: 5}, + discorddesktop.Stats{Path: "/tmp/discord", FilesVisited: 1, FullCache: true, DryRun: true}, + store.EmbeddingDrainStats{ + Processed: 3, + Succeeded: 2, + Failed: 1, + Requeued: 4, + RateLimited: true, + RemainingBacklog: 5, + Provider: "openai", + Model: "model", + InputVersion: "v1", + }, + []store.DirectMessageConversationRow{{ + ChannelID: "c1", + Name: "Alice", + MessageCount: 2, + AuthorCount: 1, + FirstMessageAt: now.Add(-time.Hour), + LastMessageAt: now, + }}, + store.MemberProfile{ + Member: store.MemberRow{ + GuildID: "g1", + UserID: "u1", + Username: "peter", + DisplayName: "Peter", + JoinedAt: now, + XHandle: "steipete", + GitHubLogin: "steipete", + Website: "https://steipete.me", + Pronouns: "he/him", + Location: "Vienna", + Bio: "Maintainer", + URLs: []string{"https://example.com"}, + }, + MessageCount: 1, + FirstMessageAt: now.Add(-time.Hour), + LastMessageAt: now, + RecentMessages: []store.MessageRow{{ChannelName: "general", CreatedAt: now, Content: "hello"}}, + }, + report.Digest{ + Since: now.Add(-24 * time.Hour), + Until: now, + WindowLabel: "1d", + Channels: []report.ChannelDigest{{ + ChannelID: "c1", + ChannelName: "general", + Kind: "text", + GuildID: "g1", + Messages: 3, + Replies: 1, + ActiveAuthors: 2, + TopPosters: []report.RankedCount{{Name: "Peter", Count: 2}}, + TopMentions: []report.RankedCount{{Count: 1}}, + }}, + Totals: report.DigestTotals{Messages: 3, Replies: 1, Channels: 1, ActiveAuthors: 2}, + }, + report.Quiet{ + Since: now.Add(-24 * time.Hour), + Until: now, + Channels: []report.QuietChannel{{ + ChannelID: "c1", + ChannelName: "general", + Kind: "text", + LastMessage: "", + DaysSilent: -1, + }}, + Totals: report.QuietTotals{Channels: 1}, + }, + report.Trends{ + Since: now.AddDate(0, 0, -14), + Until: now, + Weeks: 2, + Rows: []report.TrendsRow{{ + ChannelID: "c1", + ChannelName: "general", + Kind: "text", + GuildID: "g1", + Weekly: []report.WeeklyCount{ + {WeekStart: now.AddDate(0, 0, -14), Messages: 1}, + {WeekStart: now.AddDate(0, 0, -7), Messages: 2}, + }, + }}, + }, + map[string]any{"b": 2, "a": 1}, + } + for _, value := range values { + var out bytes.Buffer + require.NoError(t, printHuman(&out, value)) + require.NotEmpty(t, out.String()) + } + + var plain bytes.Buffer + require.NoError(t, printPlain(&plain, report.Quiet{Channels: []report.QuietChannel{{ChannelID: "c1", ChannelName: "general", Kind: "text", GuildID: "g1", LastMessage: "now", DaysSilent: 0}}})) + require.NoError(t, printPlain(&plain, report.Trends{Rows: []report.TrendsRow{{GuildID: "g1", ChannelID: "c1", ChannelName: "general", Kind: "text", Weekly: []report.WeeklyCount{{WeekStart: now, Messages: 2}}}}})) + require.Error(t, printPlain(io.Discard, struct{}{})) + require.Error(t, printHuman(io.Discard, struct{}{})) + require.Equal(t, "this is a profile field with a very l...", trimForTable("this is a profile field with a very long text value")) } func TestStatusSearchSQLAndListings(t *testing.T) { @@ -1767,7 +1957,49 @@ func TestRuntimeHelpersAndSubcommands(t *testing.T) { s, err := store.Open(ctx, dbPath) require.NoError(t, err) require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`})) + require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "dm1", GuildID: store.DirectMessageGuildID, Kind: "dm", Name: "Alice", RawJSON: `{}`})) require.NoError(t, s.UpsertMember(ctx, store.MemberRecord{GuildID: "g1", UserID: "u1", Username: "peter", RoleIDsJSON: `[]`, RawJSON: `{}`})) + base := time.Date(2026, 3, 8, 10, 0, 0, 0, time.UTC) + require.NoError(t, s.UpsertMessages(ctx, []store.MessageMutation{ + { + Record: store.MessageRecord{ + ID: "m1", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u1", + AuthorName: "peter", + CreatedAt: base.Format(time.RFC3339Nano), + Content: "hello <@u1> in <#c1>", + NormalizedContent: "hello <@u1> in <#c1>", + RawJSON: `{"author":{"username":"peter"}}`, + }, + Mentions: []store.MentionEventRecord{{ + MessageID: "m1", + GuildID: "g1", + ChannelID: "c1", + AuthorID: "u1", + TargetType: "user", + TargetID: "u1", + TargetName: "peter", + EventAt: base.Format(time.RFC3339Nano), + }}, + }, + { + Record: store.MessageRecord{ + ID: "dm-msg", + GuildID: store.DirectMessageGuildID, + ChannelID: "dm1", + ChannelName: "Alice", + AuthorID: "u2", + AuthorName: "Alice", + CreatedAt: base.Add(time.Minute).Format(time.RFC3339Nano), + Content: "private hello", + NormalizedContent: "private hello", + RawJSON: `{"source":"discord_desktop"}`, + }, + }, + })) require.NoError(t, s.Close()) rt := &runtime{ @@ -1787,11 +2019,23 @@ func TestRuntimeHelpersAndSubcommands(t *testing.T) { require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--hours", "6", "--last", "1"})) require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--days", "7", "--all"})) require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--days", "7", "--all", "--include-empty"})) + require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--since", "2026-03-08T00:00:00Z", "--before", "2026-03-09T00:00:00Z", "--limit", "1"})) + require.NoError(t, rt.runMessages([]string{"--dm", "--channel", "Alice", "--last", "1"})) + require.NoError(t, rt.runDirectMessages([]string{"--list"})) + require.NoError(t, rt.runDirectMessages([]string{"--with", "Alice", "--search", "private", "--limit", "1"})) + require.NoError(t, rt.runDirectMessages([]string{"--with", "Alice", "--since", "2026-03-08T00:00:00Z", "--before", "2026-03-09T00:00:00Z", "--all"})) require.NoError(t, rt.runMentions([]string{"--channel", "#general", "--target", "u2"})) + require.NoError(t, rt.runMentions([]string{"--channel", "#general", "--days", "7", "--type", "user"})) + require.NoError(t, rt.runDigest([]string{"--since", "12h", "--channel", "general", "--top-n", "2"})) + require.NoError(t, rt.runReport([]string{"--readme", filepath.Join(dir, "README.md")})) require.NoError(t, rt.runSearch([]string{"--include-empty", "Peter"})) require.NoError(t, rt.runChannels([]string{"show", "c1"})) require.NoError(t, rt.runChannels([]string{"list"})) require.NoError(t, rt.runStatus(nil)) + require.NoError(t, rt.runAnalytics([]string{})) + require.NoError(t, rt.runTUI([]string{"--json", "--limit", "1", "--include-empty"})) + require.NoError(t, rt.runAnalytics([]string{"quiet", "--since", "1d"})) + require.NoError(t, rt.runAnalytics([]string{"trends", "--weeks", "1", "--channel", "general"})) return nil })) } diff --git a/internal/discorddesktop/import_helpers_test.go b/internal/discorddesktop/import_helpers_test.go index ba1632a..911705d 100644 --- a/internal/discorddesktop/import_helpers_test.go +++ b/internal/discorddesktop/import_helpers_test.go @@ -1,9 +1,12 @@ package discorddesktop import ( + "context" "os" "path/filepath" + "runtime" "testing" + "time" "github.com/stretchr/testify/require" @@ -90,3 +93,106 @@ func TestCacheFileHasRouteHint(t *testing.T) { _, err = cacheFileHasRouteHint(root, "missing") require.Error(t, err) } + +func TestImportAndStateEdgeBranches(t *testing.T) { + ctx := context.Background() + _, err := Import(ctx, nil, Options{}) + require.ErrorContains(t, err, "store is required") + + configHome := t.TempDir() + t.Setenv("XDG_CONFIG_HOME", configHome) + if runtime.GOOS == "linux" { + require.Equal(t, filepath.Join(configHome, "discord"), DefaultPath()) + } + + dir := t.TempDir() + s, err := store.Open(ctx, filepath.Join(dir, "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + + stats, err := Import(ctx, s, Options{ + Path: dir, + Now: func() time.Time { return time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC) }, + }) + require.NoError(t, err) + require.Equal(t, dir, stats.Path) + require.Equal(t, 1, stats.Checkpoints) + + stats, err = Import(ctx, nil, Options{Path: filepath.Join(dir, "missing"), DryRun: true}) + require.NoError(t, err) + require.True(t, stats.DryRun) + + stats, err = Import(ctx, nil, Options{Path: dir, DryRun: true, FullCache: true}) + require.NoError(t, err) + require.True(t, stats.FullCache) + + require.NoError(t, s.SetSyncState(ctx, fileIndexScope(Options{}), "{not-json")) + require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`})) + state, err := loadScanState(ctx, s, Options{}) + require.NoError(t, err) + require.Empty(t, state.previous) + require.Equal(t, "general", state.channels["c1"].Name) +} + +func TestSnapshotFinalizeAndCommitBranches(t *testing.T) { + ctx := context.Background() + s, err := store.Open(ctx, filepath.Join(t.TempDir(), "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + + snap := newSnapshot() + snap.messages["m-missing"] = store.MessageMutation{ + Record: store.MessageRecord{ID: "m-missing", ChannelID: "c-missing", RawJSON: `{}`}, + } + snap.messages["m-known"] = store.MessageMutation{ + Record: store.MessageRecord{ID: "m-known", GuildID: "g1", ChannelID: "c1", ChannelName: "general", RawJSON: `{}`}, + } + stats := &Stats{} + totals := newScanTotals() + unresolved := finalizeSnapshot(snap, map[string]store.ChannelRecord{ + "c1": {ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}, + }, totals, stats, true) + require.Equal(t, unresolvedMessages{"m-missing": "c-missing"}, unresolved) + require.Equal(t, 1, stats.Messages) + require.Equal(t, 1, stats.SkippedMessages) + require.Equal(t, "general", snap.channels["c1"].Name) + require.Equal(t, "g1", snap.guilds["g1"].ID) + + more := unresolvedMessages{"m2": "c2"} + mergeUnresolved(unresolved, more) + recordUnresolved(unresolved, totals, stats) + require.Equal(t, 2, stats.SkippedMessages) + + state := scanState{current: map[string]fileFingerprint{}} + candidates := []fileCandidate{{relKey: "Cache_Data/entry", fingerprint: fileFingerprint{Size: 10, ModUnixNS: 20}}} + require.NoError(t, commitSnapshot(ctx, s, Options{DryRun: true}, state, candidates, newSnapshot(), true, stats)) + require.NoError(t, commitSnapshot(ctx, s, Options{}, state, candidates, newSnapshot(), false, stats)) + require.NoError(t, commitSnapshot(ctx, s, Options{}, state, candidates, newSnapshot(), true, stats)) + require.True(t, isImportedFingerprint(state.current["Cache_Data/entry"])) + + require.NoError(t, checkpointScannedCandidates(ctx, s, Options{DryRun: true}, state, candidates, stats)) + require.NoError(t, checkpointScannedCandidates(ctx, s, Options{}, state, candidates, stats)) +} + +func TestRouteHintCollectionBranches(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "route"), []byte("https://discord.com/channels/123456789012/111111111111111121"), 0o600)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "plain"), []byte("plain"), 0o600)) + + root, err := os.OpenRoot(dir) + require.NoError(t, err) + defer func() { _ = root.Close() }() + + snap := newSnapshot() + err = collectCacheRouteHints(context.Background(), root, []fileCandidate{ + {relPath: "missing"}, + {relPath: "plain"}, + {relPath: "route"}, + }, snap) + require.NoError(t, err) + require.Equal(t, "123456789012", snap.routes["111111111111111121"]) + + canceled, cancel := context.WithCancel(context.Background()) + cancel() + require.ErrorIs(t, collectCacheRouteHints(canceled, root, []fileCandidate{{relPath: "route"}}, newSnapshot()), context.Canceled) +} diff --git a/internal/discorddesktop/import_value_helpers_test.go b/internal/discorddesktop/import_value_helpers_test.go index 30943cc..c76abbe 100644 --- a/internal/discorddesktop/import_value_helpers_test.go +++ b/internal/discorddesktop/import_value_helpers_test.go @@ -3,8 +3,11 @@ package discorddesktop import ( "encoding/json" "testing" + "time" "github.com/stretchr/testify/require" + + "github.com/openclaw/discrawl/internal/store" ) func TestPrimitiveValueHelpers(t *testing.T) { @@ -78,3 +81,85 @@ func TestDiscordValueFormatHelpers(t *testing.T) { require.Equal(t, "desktop", kindForChannelType(16, false)) require.Equal(t, "text", kindForChannelType(0, false)) } + +func TestDiscordMessagePayloadHelpers(t *testing.T) { + raw := map[string]any{ + "id": "333333333333333333", + "channel_id": "111111111111111111", + "guild_id": "999999999999999999", + "type": float64(0), + "timestamp": "2026-05-08T12:00:00Z", + "edited_timestamp": "2026-05-08T12:05:00Z", + "content": "hello\u200b\nworld", + "message_reference": map[string]any{"message_id": "222222222222222222"}, + "author": map[string]any{ + "id": "444444444444444444", + "username": "peter", + "global_name": "Peter", + "display_name": "Peter S", + "discriminator": "0", + "bot": true, + }, + "attachments": []any{ + map[string]any{"filename": "trace.txt", "content_type": "text/plain", "size": float64(12), "url": "https://cdn.example/trace.txt"}, + map[string]any{"id": "att2"}, + "ignored", + }, + "mentions": []any{ + map[string]any{"id": "555555555555555555", "username": "alice", "global_name": "Alice"}, + map[string]any{"username": "missing"}, + }, + "embeds": []any{ + map[string]any{"title": "Deploy", "description": "Ready"}, + map[string]any{"title": " "}, + }, + } + at := parseDiscordTime("2026-05-08T12:00:00Z") + attachments := parseAttachments(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444") + require.Len(t, attachments, 2) + require.Equal(t, "333333333333333333:0", attachments[0].AttachmentID) + require.Equal(t, "trace.txt", attachments[0].Filename) + require.Equal(t, "att2", attachments[1].Filename) + require.Equal(t, []string{"trace.txt", "att2"}, attachmentText(attachments)) + + mentions := parseMentions(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444", at) + require.Equal(t, []store.MentionEventRecord{{ + MessageID: "333333333333333333", + GuildID: "999999999999999999", + ChannelID: "111111111111111111", + AuthorID: "444444444444444444", + TargetType: "user", + TargetID: "555555555555555555", + TargetName: "Alice", + EventAt: at.Format(time.RFC3339Nano), + }}, mentions) + + require.Equal(t, []string{"Deploy", "Ready"}, embedText(raw)) + require.Equal(t, "helloworld\ntrace.txt\natt2\nDeploy\nReady", normalizeText(raw["content"], attachmentText(attachments), embedText(raw))) + require.Equal(t, "hidden text", cleanText("\u200bhidden\x00 text\n")) + require.Equal(t, "222222222222222222", messageReferenceID(raw)) + require.Empty(t, messageReferenceID(map[string]any{})) + + require.Contains(t, syntheticGuild("g1", "Guild").RawJSON, "discord_desktop") + require.Equal(t, "dm", syntheticChannel("c1", DirectMessageGuildID, "Alice").Kind) + require.Equal(t, "group_dm", syntheticChannel("c2", DirectMessageGuildID, "Alice, Bob").Kind) + require.Equal(t, "channel-123456", syntheticChannel("123456123456", "g1", "").Name) + require.Contains(t, channelRawJSON(raw, "c1", "g1", "general", "text"), `"kind":"text"`) + require.Contains(t, messageRawJSON(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444"), "desktop_cache_note") + require.Equal(t, "Alice, Bob", recipientLabel([]any{ + map[string]any{"username": "Bob"}, + map[string]any{"global_name": "Alice"}, + map[string]any{}, + })) + + require.True(t, parseDiscordTime("2026-05-08T12:00:00.123Z").Equal(time.Date(2026, 5, 8, 12, 0, 0, 123000000, time.UTC))) + require.True(t, parseDiscordTime("bad").IsZero()) + require.True(t, parseDiscordTime("").IsZero()) + require.False(t, snowflakeTime("175928847299117063").IsZero()) + require.True(t, snowflakeTime("bad").IsZero()) + require.Empty(t, formatOptionalTime(time.Time{})) + require.Equal(t, "2026-05-08T12:00:00Z", formatOptionalTime(at)) + require.True(t, looksSnowflake("123456789012")) + require.False(t, looksSnowflake("123")) + require.False(t, looksSnowflake("12345678901x")) +} diff --git a/internal/embed/provider_test.go b/internal/embed/provider_test.go index e700b32..e22c9ee 100644 --- a/internal/embed/provider_test.go +++ b/internal/embed/provider_test.go @@ -304,6 +304,74 @@ func TestProviderOptionsAndProbeDecisions(t *testing.T) { require.False(t, shouldProbe(providerSettings{Name: ProviderOpenAICompatible, BaseURL: "https://api.example.com/v1"})) } +func TestProviderValidationEdges(t *testing.T) { + t.Parallel() + + _, err := resolveProviderConfig(config.EmbeddingsConfig{ + Provider: ProviderOllama, + RequestTimeout: "not-a-duration", + }, true) + require.ErrorContains(t, err, "parse embeddings request_timeout") + + _, err = resolveProviderConfig(config.EmbeddingsConfig{ + Provider: ProviderOllama, + RequestTimeout: "0s", + }, true) + require.ErrorContains(t, err, "must be positive") + + _, err = resolveProviderConfig(config.EmbeddingsConfig{ + Provider: ProviderOllama, + BaseURL: "://bad", + }, true) + require.ErrorContains(t, err, "invalid embeddings base_url") + + key, err := resolveAPIKey(ProviderOpenAICompatible, "MISSING_EMBED_KEY", false) + require.NoError(t, err) + require.Empty(t, key) + + _, err = newProvider(providerSettings{Name: "bogus"}) + require.ErrorContains(t, err, "unsupported embedding provider") + + require.Equal(t, []string{"abc"}, trimInputs([]string{"abc"}, 0)) + _, err = inferDimensions([][]float32{{}}) + require.ErrorContains(t, err, "empty vector") +} + +func TestOllamaProviderResponseEdges(t *testing.T) { + t.Parallel() + + countServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/api/embed", r.URL.Path) + _, _ = w.Write([]byte(`{"embeddings":[]}`)) + })) + defer countServer.Close() + + provider := newOllamaProvider(providerSettings{ + HTTPClient: countServer.Client(), + BaseURL: countServer.URL, + Model: "fallback-model", + MaxInputChars: 10, + }) + _, err := provider.Embed(context.Background(), []string{"one"}) + require.ErrorContains(t, err, "returned 0 vectors for 1 inputs") + + modelServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/api/embed", r.URL.Path) + _, _ = w.Write([]byte(`{"embeddings":[[1,2]]}`)) + })) + defer modelServer.Close() + + provider = newOllamaProvider(providerSettings{ + HTTPClient: modelServer.Client(), + BaseURL: modelServer.URL, + Model: "fallback-model", + MaxInputChars: 10, + }) + batch, err := provider.Embed(context.Background(), []string{"one"}) + require.NoError(t, err) + require.Equal(t, "fallback-model", batch.Model) +} + func TestCheckProviderSkipsRemoteCompatibleProbe(t *testing.T) { t.Parallel() diff --git a/internal/store/store_test.go b/internal/store/store_test.go index 2d1e183..77672fd 100644 --- a/internal/store/store_test.go +++ b/internal/store/store_test.go @@ -389,6 +389,99 @@ func TestStoreReadWriteAndSearch(t *testing.T) { require.Equal(t, "Peter", messageRows[0].AuthorName) } +func TestListMessagesWithThreadContextAndMentionLabels(t *testing.T) { + t.Parallel() + + ctx := context.Background() + s, err := Open(ctx, filepath.Join(t.TempDir(), "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + + base := time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC) + require.NoError(t, s.UpsertGuild(ctx, GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`})) + require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`})) + require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c2", GuildID: "g2", Kind: "text", Name: "other", RawJSON: `{}`})) + require.NoError(t, s.UpsertMember(ctx, MemberRecord{ + GuildID: "g1", + UserID: "u1", + Username: "alice", + DisplayName: "Alice", + RoleIDsJSON: `[]`, + RawJSON: `{}`, + })) + require.NoError(t, s.UpsertMember(ctx, MemberRecord{ + GuildID: "g2", + UserID: "u1", + Username: "other-alice", + DisplayName: "Other Alice", + RoleIDsJSON: `[]`, + RawJSON: `{}`, + })) + require.NoError(t, s.UpsertMessages(ctx, []MessageMutation{ + { + Record: MessageRecord{ + ID: "root", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u1", + AuthorName: "Alice", + CreatedAt: base.Format(time.RFC3339Nano), + Content: "root mentions <@u1> and <#c1>", + NormalizedContent: "root mentions <@u1> and <#c1>", + RawJSON: `{}`, + }, + Mentions: []MentionEventRecord{{ + MessageID: "root", + GuildID: "g1", + ChannelID: "c1", + AuthorID: "u1", + TargetType: "role", + TargetID: "r1", + TargetName: "Maintainers", + EventAt: base.Format(time.RFC3339Nano), + }}, + }, + { + Record: MessageRecord{ + ID: "reply", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u1", + AuthorName: "Alice", + CreatedAt: base.Add(time.Minute).Format(time.RFC3339Nano), + Content: "reply to root <@&r1>", + NormalizedContent: "reply to root <@&r1>", + ReplyToMessageID: "root", + RawJSON: `{}`, + }, + Mentions: []MentionEventRecord{{ + MessageID: "reply", + GuildID: "g1", + ChannelID: "c1", + AuthorID: "u1", + TargetType: "role", + TargetID: "r1", + TargetName: "Maintainers", + EventAt: base.Add(time.Minute).Format(time.RFC3339Nano), + }}, + }, + })) + + rows, err := s.ListMessagesWithThreadContext(ctx, MessageListOptions{Channel: "general", Since: base.Add(30 * time.Second), Limit: 1}) + require.NoError(t, err) + require.Equal(t, []string{"reply", "root"}, messageRowIDs(rows)) + require.Equal(t, "reply to root @Maintainers", rows[0].DisplayContent) + require.Equal(t, "root mentions @Alice and #general", rows[1].DisplayContent) + + merged := mergeMessageRows(rows[:1], []MessageRow{rows[0], {MessageID: "other", GuildID: "g1", ChannelID: "c1"}}) + require.Equal(t, []string{"reply", "other"}, messageRowIDs(merged)) + require.Equal(t, "@fallback", replaceDiscordMention("<@missing>", "user", "missing", "fallback")) + require.Equal(t, "#chan", replaceDiscordMention("<#c1>", "channel", "c1", "chan")) + require.Equal(t, "<@u2>", replaceDiscordMention("<@u2>", "user", "", "blank")) +} + func TestSearchMessagesPrefersRecentMessageIDs(t *testing.T) { t.Parallel() @@ -843,6 +936,14 @@ func searchResultIDs(results []SearchResult) []string { return ids } +func messageRowIDs(rows []MessageRow) []string { + ids := make([]string, 0, len(rows)) + for _, row := range rows { + ids = append(ids, row.MessageID) + } + return ids +} + func TestCheckMessageFTSProbe(t *testing.T) { t.Parallel() @@ -937,6 +1038,33 @@ func TestOpenSetsSchemaVersion(t *testing.T) { require.Equal(t, storeSchemaVersion, version) } +func TestOpenReadOnlySchemaChecks(t *testing.T) { + t.Parallel() + + ctx := context.Background() + dbPath := filepath.Join(t.TempDir(), "discrawl.db") + s, err := Open(ctx, dbPath) + require.NoError(t, err) + require.NoError(t, s.UpsertGuild(ctx, GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`})) + require.NoError(t, s.Close()) + + ro, err := OpenReadOnly(ctx, dbPath) + require.NoError(t, err) + status, err := ro.Status(ctx, dbPath, "") + require.NoError(t, err) + require.Equal(t, 1, status.GuildCount) + require.NoError(t, ro.Close()) + + future, err := sql.Open("sqlite", dbPath) + require.NoError(t, err) + _, err = future.ExecContext(ctx, `pragma user_version = 999`) + require.NoError(t, err) + require.NoError(t, future.Close()) + + _, err = OpenReadOnly(ctx, dbPath) + require.ErrorContains(t, err, "database schema version mismatch") +} + func TestOpenFailsOnFutureSchemaVersion(t *testing.T) { t.Parallel() diff --git a/internal/syncer/message_sync_helpers_test.go b/internal/syncer/message_sync_helpers_test.go new file mode 100644 index 0000000..f7dbc00 --- /dev/null +++ b/internal/syncer/message_sync_helpers_test.go @@ -0,0 +1,214 @@ +package syncer + +import ( + "context" + "errors" + "log/slog" + "path/filepath" + "testing" + "time" + + "github.com/bwmarrin/discordgo" + "github.com/stretchr/testify/require" + + "github.com/openclaw/discrawl/internal/store" +) + +func TestMessageChannelSelectionAndTimeoutHelpers(t *testing.T) { + t.Parallel() + + parent := &discordgo.Channel{ID: "forum", GuildID: "g1", Name: "forum", Type: discordgo.ChannelTypeGuildForum} + thread := &discordgo.Channel{ID: "thread", GuildID: "g1", ParentID: "forum", Name: "thread", Type: discordgo.ChannelTypeGuildPublicThread} + text := &discordgo.Channel{ID: "text", GuildID: "g1", Name: "text", Type: discordgo.ChannelTypeGuildText} + voice := &discordgo.Channel{ID: "voice", GuildID: "g1", Name: "voice", Type: discordgo.ChannelTypeGuildVoice} + + rows := filterMessageChannels([]*discordgo.Channel{nil, parent, thread, text, voice}, []string{"forum"}) + require.Equal(t, []string{"thread"}, channelIDs(rows)) + require.False(t, requestedMessageTarget(nil, nil, map[string]struct{}{})) + require.True(t, requestedMessageTarget(text, map[string]*discordgo.Channel{"text": text}, map[string]struct{}{"text": {}})) + require.False(t, requestedMessageTarget(thread, map[string]*discordgo.Channel{}, map[string]struct{}{"forum": {}})) + + ctx, cancel := (*Syncer)(nil).messageChannelContext(context.Background()) + require.NoError(t, ctx.Err()) + cancel() + require.ErrorIs(t, ctx.Err(), context.Canceled) + + svc := New(&fakeClient{}, nil, nil) + svc.messageChannelTimeout = time.Second + ctx, cancel = svc.messageChannelContext(context.Background()) + defer cancel() + _, ok := ctx.Deadline() + require.True(t, ok) + + parentCtx, parentCancel := context.WithDeadline(context.Background(), time.Now().Add(time.Hour)) + defer parentCancel() + ctx, cancel = svc.messageChannelContext(parentCtx) + defer cancel() + deadline, ok := ctx.Deadline() + require.True(t, ok) + parentDeadline, _ := parentCtx.Deadline() + require.Equal(t, parentDeadline, deadline) +} + +func TestChannelSyncStateHelpers(t *testing.T) { + t.Parallel() + + channel := &discordgo.Channel{ID: "c1", LastMessageID: "200"} + require.False(t, shouldSkipChannelSync(nil, channelSyncState{BackfillComplete: true})) + require.True(t, shouldSkipChannelSync(&discordgo.Channel{ID: "c1"}, channelSyncState{BackfillComplete: true, Latest: ""})) + require.False(t, shouldSkipChannelSync(channel, channelSyncState{BackfillComplete: true, Latest: ""})) + require.True(t, shouldSkipChannelSync(channel, channelSyncState{BackfillComplete: true, Latest: "300"})) + require.False(t, shouldSkipLatestOnlyChannelSync(nil, channelSyncState{Latest: "300"})) + require.False(t, shouldSkipLatestOnlyChannelSync(channel, channelSyncState{})) + require.True(t, shouldSkipLatestOnlyChannelSync(channel, channelSyncState{Latest: "300"})) + + messages := []*discordgo.Message{ + {ID: "3", Timestamp: time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC)}, + {ID: "2", Timestamp: time.Date(2026, 5, 8, 11, 0, 0, 0, time.UTC)}, + {ID: "1", Timestamp: time.Date(2026, 5, 8, 10, 0, 0, 0, time.UTC)}, + } + filtered, reached := filterMessagesSince(messages, time.Date(2026, 5, 8, 10, 30, 0, 0, time.UTC)) + require.True(t, reached) + require.Equal(t, []string{"3", "2"}, messageIDs(filtered)) + filtered, reached = filterMessagesSince(messages, time.Time{}) + require.False(t, reached) + require.Len(t, filtered, 3) +} + +func TestChannelSyncStateStoreHelpers(t *testing.T) { + t.Parallel() + + ctx := context.Background() + s, err := store.Open(ctx, filepath.Join(t.TempDir(), "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`})) + require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{ + ID: "100", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u1", + AuthorName: "User", + CreatedAt: time.Now().UTC().Format(time.RFC3339Nano), + Content: "hello", + NormalizedContent: "hello", + RawJSON: `{}`, + })) + + svc := New(&fakeClient{}, s, nil) + state := channelSyncState{} + require.NoError(t, svc.seedChannelSyncState(ctx, "c1", &state)) + require.Equal(t, "100", state.Latest) + require.Equal(t, "100", state.BackfillCursor) + + state = channelSyncState{StoredLatest: "100"} + require.NoError(t, svc.seedChannelSyncState(ctx, "missing-channel", &state)) + require.True(t, state.BackfillComplete) + + require.NoError(t, s.SetSyncState(ctx, channelLatestScope("c1"), "200")) + require.NoError(t, s.SetSyncState(ctx, channelBackfillScope("c1"), "100")) + require.NoError(t, s.SetSyncState(ctx, channelHistoryCompleteScope("c1"), "1")) + loaded, err := svc.loadChannelSyncState(ctx, "c1") + require.NoError(t, err) + require.Equal(t, channelSyncState{Latest: "200", StoredLatest: "200", BackfillCursor: "100", BackfillComplete: true}, loaded) +} + +func TestMessageChannelSyncBranches(t *testing.T) { + t.Parallel() + + ctx := context.Background() + s, err := store.Open(ctx, filepath.Join(t.TempDir(), "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + + svc := New(&fakeClient{}, s, nil) + count, err := svc.syncMessageChannels(ctx, "g1", nil, SyncOptions{}) + require.NoError(t, err) + require.Zero(t, count) + require.NoError(t, svc.clearUnavailableChannel(ctx, "")) + require.NoError(t, (*Syncer)(nil).clearUnavailableChannel(ctx, "c1")) + + channel := &discordgo.Channel{ID: "c1", GuildID: "g1", Name: "general", Type: discordgo.ChannelTypeGuildText} + client := &fakeClient{ + messages: map[string][]*discordgo.Message{ + "c1": {{ + ID: "100", + GuildID: "g1", + ChannelID: "c1", + Content: "hello", + Timestamp: time.Now().UTC(), + Author: &discordgo.User{ID: "u1", Username: "user"}, + }}, + }, + } + svc = New(client, s, nil) + count, err = svc.syncMessageChannelsSerial(ctx, "g1", []*discordgo.Channel{channel}, SyncOptions{Full: true}, nil) + require.NoError(t, err) + require.Equal(t, 1, count) + + errChannel := &discordgo.Channel{ID: "c-err", GuildID: "g1", Name: "errors", Type: discordgo.ChannelTypeGuildText} + client.messageErrors = map[string]error{"c-err": errors.New(`HTTP 500 Internal Server Error`)} + count, err = svc.syncMessageChannelsSerial(ctx, "g1", []*discordgo.Channel{errChannel}, SyncOptions{Full: true}, nil) + require.NoError(t, err) + require.Zero(t, count) + + client.messageErrors = map[string]error{"c-err": errors.New("hard failure")} + count, err = svc.syncMessageChannelsSerial(ctx, "g1", []*discordgo.Channel{errChannel}, SyncOptions{Full: true}, nil) + require.ErrorContains(t, err, "sync channel c-err") + require.Zero(t, count) +} + +func TestMessageChannelConcurrentErrorAndProgressBranches(t *testing.T) { + t.Parallel() + + ctx := context.Background() + s, err := store.Open(ctx, filepath.Join(t.TempDir(), "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + + channels := []*discordgo.Channel{ + {ID: "c1", GuildID: "g1", Name: "one", Type: discordgo.ChannelTypeGuildText}, + {ID: "c2", GuildID: "g1", Name: "two", Type: discordgo.ChannelTypeGuildText}, + } + client := &fakeClient{ + messages: map[string][]*discordgo.Message{ + "c1": {{ + ID: "101", + GuildID: "g1", + ChannelID: "c1", + Content: "one", + Timestamp: time.Now().UTC(), + Author: &discordgo.User{ID: "u1", Username: "user"}, + }}, + }, + messageErrors: map[string]error{"c2": errors.New("hard failure")}, + } + svc := New(client, s, slog.New(slog.DiscardHandler)) + count, err := svc.syncMessageChannelsConcurrent(ctx, "g1", channels, SyncOptions{Full: true}, 2, newMessageSyncProgress(svc, "g1", len(channels), SyncOptions{Full: true, Concurrency: 2})) + require.ErrorContains(t, err, "sync channel c2") + require.Equal(t, 1, count) + + progress := &messageSyncProgress{} + progress.start(nil) + progress.touch(nil, 1) + progress.finish(nil) + progress.logWaitHeartbeat() + require.Equal(t, "skipped", syncErrorOutcome(errors.New("plain"))) +} + +func channelIDs(channels []*discordgo.Channel) []string { + out := make([]string, 0, len(channels)) + for _, channel := range channels { + out = append(out, channel.ID) + } + return out +} + +func messageIDs(messages []*discordgo.Message) []string { + out := make([]string, 0, len(messages)) + for _, message := range messages { + out = append(out, message.ID) + } + return out +}