383 lines
16 KiB
Go
383 lines
16 KiB
Go
package discorddesktop
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
"github.com/steipete/discrawl/internal/store"
|
|
)
|
|
|
|
func TestImportFastCacheSkipsUnroutedCacheDataUnlessFullCache(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(`
|
|
{"id":"111111111111111121","guild_id":"999999999999999996","type":0,"name":"slow-cache"}
|
|
{"id":"333333333333333346","channel_id":"111111111111111121","content":"unrouted historical cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`), 0o600))
|
|
|
|
fastStore, err := store.Open(ctx, filepath.Join(dir, "fast.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = fastStore.Close() }()
|
|
|
|
stats, err := Import(ctx, fastStore, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.CacheFilesFastSkipped)
|
|
require.Equal(t, 0, stats.Messages)
|
|
|
|
results, err := fastStore.SearchMessages(ctx, store.SearchOptions{Query: "unrouted historical", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Empty(t, results)
|
|
|
|
stats, err = Import(ctx, fastStore, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, 0, stats.CacheFilesFastSkipped)
|
|
require.Equal(t, 1, stats.FilesUnchanged)
|
|
|
|
stats, err = Import(ctx, fastStore, Options{Path: dir, FullCache: true})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.Messages)
|
|
|
|
fullStore, err := store.Open(ctx, filepath.Join(dir, "full.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = fullStore.Close() }()
|
|
|
|
stats, err = Import(ctx, fullStore, Options{Path: dir, FullCache: true})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1, stats.FilesScanned)
|
|
require.Equal(t, 0, stats.CacheFilesFastSkipped)
|
|
require.Equal(t, 1, stats.Messages)
|
|
|
|
results, err = fullStore.SearchMessages(ctx, store.SearchOptions{Query: "unrouted historical", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Len(t, results, 1)
|
|
require.Equal(t, "slow-cache", results[0].ChannelName)
|
|
}
|
|
|
|
func TestImportCheckpointsCacheBatches(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
for i := range checkpointEveryFiles + 1 {
|
|
channelID := "111111111111111121"
|
|
messageID := 333333333333333346 + i
|
|
body := []byte(fmt.Sprintf(`https://discord.com/channels/999999999999999996/%s
|
|
{"id":"%d","channel_id":"%s","content":"checkpoint cache %d","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`, channelID, messageID, channelID, i))
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), body, 0o600))
|
|
}
|
|
|
|
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, checkpointEveryFiles+1, stats.FilesScanned)
|
|
require.Equal(t, checkpointEveryFiles+1, stats.Messages)
|
|
require.GreaterOrEqual(t, stats.Checkpoints, 2)
|
|
|
|
stats, err = Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
|
|
}
|
|
|
|
func TestImportUsesLaterCacheMetadataBeforeCheckpointingEarlierBatch(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
|
|
channelID := "111111111111111121"
|
|
guildID := "999999999999999996"
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), []byte(fmt.Sprintf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
|
{"id":"333333333333333346","channel_id":"%s","content":"needs later channel metadata","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`, channelID, channelID)), 0o600))
|
|
for i := 1; i < checkpointEveryFiles; i++ {
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), []byte(fmt.Sprintf(
|
|
"https://discord.com/api/v9/channels/%s/messages?limit=50\n",
|
|
channelID,
|
|
)), 0o600))
|
|
}
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", checkpointEveryFiles)), []byte(fmt.Sprintf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
|
{"id":"%s","guild_id":"%s","type":0,"name":"later-metadata"}
|
|
`, channelID, channelID, guildID)), 0o600))
|
|
|
|
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, checkpointEveryFiles+1+checkpointEveryFiles, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.Messages)
|
|
require.GreaterOrEqual(t, stats.Checkpoints, 2)
|
|
|
|
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "needs later channel metadata", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Len(t, results, 1)
|
|
require.Equal(t, guildID, results[0].GuildID)
|
|
require.Equal(t, "later-metadata", results[0].ChannelName)
|
|
requireMessageCount(t, ctx, st, "message_events", 1)
|
|
|
|
stats, err = Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
|
|
requireMessageCount(t, ctx, st, "message_events", 1)
|
|
}
|
|
|
|
func TestImportCheckpointsPartiallyResolvedRetryBatch(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
|
|
resolvedChannelID := "111111111111111121"
|
|
unresolvedChannelID := "111111111111111122"
|
|
guildID := "999999999999999996"
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), []byte(fmt.Sprintf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
|
https://discord.com/api/v9/channels/%s/messages?limit=50
|
|
{"id":"333333333333333346","channel_id":"%s","content":"partially resolved retry message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
{"id":"333333333333333347","channel_id":"%s","content":"still unresolved retry message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`, resolvedChannelID, unresolvedChannelID, resolvedChannelID, unresolvedChannelID)), 0o600))
|
|
for i := 1; i < checkpointEveryFiles; i++ {
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), []byte(fmt.Sprintf(
|
|
"https://discord.com/api/v9/channels/%s/messages?limit=50\n",
|
|
resolvedChannelID,
|
|
)), 0o600))
|
|
}
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", checkpointEveryFiles)), []byte(fmt.Sprintf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
|
{"id":"%s","guild_id":"%s","type":0,"name":"partially-resolved"}
|
|
`, resolvedChannelID, resolvedChannelID, guildID)), 0o600))
|
|
|
|
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, checkpointEveryFiles+1+checkpointEveryFiles, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.Messages)
|
|
require.Equal(t, 1, stats.SkippedMessages)
|
|
require.GreaterOrEqual(t, stats.Checkpoints, 2)
|
|
|
|
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "partially resolved retry", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Len(t, results, 1)
|
|
require.Equal(t, "partially-resolved", results[0].ChannelName)
|
|
results, err = st.SearchMessages(ctx, store.SearchOptions{Query: "still unresolved retry", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Empty(t, results)
|
|
requireMessageCount(t, ctx, st, "message_events", 1)
|
|
|
|
stats, err = Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
|
|
requireMessageCount(t, ctx, st, "message_events", 1)
|
|
}
|
|
|
|
func TestImportCheckpointsUnresolvableRouteBearingCacheMisses(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
|
|
channelID := "111111111111111121"
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), []byte(fmt.Sprintf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
|
{"id":"333333333333333346","channel_id":"%s","content":"permanent unresolved cache miss","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`, channelID, channelID)), 0o600))
|
|
|
|
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.SkippedMessages)
|
|
require.Equal(t, 1, stats.Checkpoints)
|
|
|
|
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "permanent unresolved", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Empty(t, results)
|
|
|
|
stats, err = Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.FilesUnchanged)
|
|
}
|
|
|
|
func TestImportDoesNotAppendEventsForSkippedMixedBatch(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
|
|
guildID := "999999999999999996"
|
|
resolvedChannelID := "111111111111111121"
|
|
unresolvedChannelID := "111111111111111122"
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), []byte(fmt.Sprintf(`https://discord.com/channels/%s/%s
|
|
https://discord.com/api/v9/channels/%s/messages?limit=50
|
|
{"id":"333333333333333346","channel_id":"%s","content":"mixed resolved message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
{"id":"333333333333333347","channel_id":"%s","content":"mixed unresolved message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`, guildID, resolvedChannelID, unresolvedChannelID, resolvedChannelID, unresolvedChannelID)), 0o600))
|
|
|
|
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.Checkpoints)
|
|
requireMessageCount(t, ctx, st, "message_events", 0)
|
|
|
|
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "mixed resolved", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Len(t, results, 1)
|
|
results, err = st.SearchMessages(ctx, store.SearchOptions{Query: "mixed unresolved", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Empty(t, results)
|
|
|
|
stats, err = Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.FilesUnchanged)
|
|
requireMessageCount(t, ctx, st, "message_events", 0)
|
|
}
|
|
|
|
func TestImportDoesNotDuplicateEventsWhenSwitchingFullCacheModes(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
|
|
channelID := "111111111111111121"
|
|
guildID := "999999999999999996"
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), []byte(fmt.Sprintf(`https://discord.com/channels/%s/%s
|
|
{"id":"%s","guild_id":"%s","type":0,"name":"mode-switch"}
|
|
{"id":"333333333333333346","channel_id":"%s","content":"mode switch event once","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`, guildID, channelID, channelID, guildID, channelID)), 0o600))
|
|
|
|
t.Run("full then default", func(t *testing.T) {
|
|
st, err := store.Open(ctx, filepath.Join(dir, "full-first.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir, FullCache: true})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.Messages)
|
|
requireMessageCount(t, ctx, st, "message_events", 1)
|
|
|
|
stats, err = Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.FilesUnchanged)
|
|
requireMessageCount(t, ctx, st, "message_events", 1)
|
|
})
|
|
|
|
t.Run("default then full", func(t *testing.T) {
|
|
st, err := store.Open(ctx, filepath.Join(dir, "default-first.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.Messages)
|
|
requireMessageCount(t, ctx, st, "message_events", 1)
|
|
|
|
stats, err = Import(ctx, st, Options{Path: dir, FullCache: true})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.FilesUnchanged)
|
|
requireMessageCount(t, ctx, st, "message_events", 1)
|
|
})
|
|
}
|
|
|
|
func TestImportFastCachePreservesKnownChannelMetadataAcrossBatches(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
leveldbPath := filepath.Join(dir, "Local Storage", "leveldb")
|
|
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
|
require.NoError(t, os.MkdirAll(leveldbPath, 0o755))
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
|
|
channelID := "111111111111111121"
|
|
guildID := "999999999999999996"
|
|
require.NoError(t, os.WriteFile(filepath.Join(leveldbPath, "000001.log"), []byte(fmt.Sprintf(
|
|
`{"id":"%s","guild_id":"%s","type":11,"name":"known-thread","thread_metadata":{"archived":false}}`,
|
|
channelID,
|
|
guildID,
|
|
)), 0o600))
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(fmt.Sprintf(`https://discord.com/channels/%s/%s
|
|
{"id":"333333333333333346","channel_id":"%s","content":"thread metadata cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`, guildID, channelID, channelID)), 0o600))
|
|
|
|
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 1, stats.Messages)
|
|
|
|
channels, err := st.Channels(ctx, guildID)
|
|
require.NoError(t, err)
|
|
require.Len(t, channels, 1)
|
|
require.Equal(t, "known-thread", channels[0].Name)
|
|
require.Equal(t, "thread_public", channels[0].Kind)
|
|
|
|
_, rows, err := st.ReadOnlyQuery(ctx, "select raw_json from channels where id = '111111111111111121'")
|
|
require.NoError(t, err)
|
|
require.Len(t, rows, 1)
|
|
require.Contains(t, rows[0][0], `"type":11`)
|
|
}
|
|
|
|
func TestImportFastCacheRouteFiltersServiceWorkerCacheStorage(t *testing.T) {
|
|
ctx := context.Background()
|
|
dir := t.TempDir()
|
|
cachePath := filepath.Join(dir, "Service Worker", "CacheStorage", "cache-id")
|
|
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
|
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "unrouted"), []byte(`
|
|
{"id":"111111111111111121","guild_id":"999999999999999996","type":0,"name":"service-worker-cache"}
|
|
{"id":"333333333333333346","channel_id":"111111111111111121","content":"service worker historical cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
|
`), 0o600))
|
|
|
|
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
|
require.NoError(t, err)
|
|
defer func() { _ = st.Close() }()
|
|
|
|
stats, err := Import(ctx, st, Options{Path: dir})
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, stats.FilesScanned)
|
|
require.Equal(t, 1, stats.CacheFilesFastSkipped)
|
|
|
|
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "service worker historical", Limit: 10})
|
|
require.NoError(t, err)
|
|
require.Empty(t, results)
|
|
}
|
|
|
|
func requireMessageCount(t *testing.T, ctx context.Context, st *store.Store, table string, expected int) {
|
|
t.Helper()
|
|
_, rows, err := st.ReadOnlyQuery(ctx, fmt.Sprintf("select count(*) from %s", table))
|
|
require.NoError(t, err)
|
|
require.Len(t, rows, 1)
|
|
require.Len(t, rows[0], 1)
|
|
require.Equal(t, fmt.Sprint(expected), rows[0][0])
|
|
}
|