From 99b68c546e15c75edd02ee9a2b0320e72f13dac8 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Tue, 21 Apr 2026 05:46:08 +0100 Subject: [PATCH] test: cover git-backed archive sync --- README.md | 8 ++ internal/cli/cli.go | 4 +- internal/cli/cli_test.go | 167 +++++++++++++++++++++++++++++ internal/cli/docker_smoke_test.go | 33 ++++++ scripts/docker-git-source-smoke.sh | 74 +++++++++++++ 5 files changed, 284 insertions(+), 2 deletions(-) create mode 100644 internal/cli/docker_smoke_test.go create mode 100755 scripts/docker-git-source-smoke.sh diff --git a/README.md b/README.md index cff9b67..fc0410c 100644 --- a/README.md +++ b/README.md @@ -348,6 +348,14 @@ discrawl subscribe --no-auto-update https://github.com/openclaw/discord-backup.g Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`). `discrawl update` forces the same pull/import step manually. +Hybrid mode is supported too: keep normal Discord credentials configured and set `share.remote`. `discrawl sync` and `discrawl messages --sync` import the Git snapshot first, then use live Discord only to fill anything newer or missing. This keeps day-to-day sync fast while preserving live repair behavior. + +The Docker smoke test installs `discrawl` in a clean Go container, subscribes to a Git snapshot repo, then checks `search`, `messages`, `sql`, and `report`: + +```bash +DISCRAWL_DOCKER_TEST=1 go test ./internal/cli -run TestDockerGitSourceSmoke -count=1 +``` + ### `report` Generates the Markdown activity block used by the shared backup repo README. diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 6be13b3..2051291 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -120,13 +120,13 @@ func (r *runtime) dispatch(rest []string) error { case "init": return r.runInit(rest[1:]) case "sync": - return r.withServices(true, func() error { return r.runSync(rest[1:]) }) + return r.withServicesAuto(true, true, func() error { return r.runSync(rest[1:]) }) case "tail": return r.withServices(true, func() error { return r.runTail(rest[1:]) }) case "search": return r.withServices(false, func() error { return r.runSearch(rest[1:]) }) case "messages": - return r.withServices(hasBoolFlag(rest[1:], "--sync"), func() error { return r.runMessages(rest[1:]) }) + return r.withServicesAuto(hasBoolFlag(rest[1:], "--sync"), true, func() error { return r.runMessages(rest[1:]) }) case "mentions": return r.withServices(false, func() error { return r.runMentions(rest[1:]) }) case "sql": diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index 9611a2e..e044af8 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -287,6 +287,113 @@ func TestSubscribeGitOnlyModeNeedsNoDiscordCredentials(t *testing.T) { require.Contains(t, err.Error(), "discord token disabled") } +func TestShareUpdateImportsNewRemoteSnapshot(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + remoteRepo := filepath.Join(dir, "remote.git") + runGit(t, dir, "init", "--bare", remoteRepo) + + publisherDB := filepath.Join(dir, "publisher.db") + publisher := seedCLIStore(t, publisherDB) + defer func() { _ = publisher.Close() }() + publisherRepo := filepath.Join(dir, "publisher-share") + opts := share.Options{RepoPath: publisherRepo, Remote: remoteRepo, Branch: "main"} + publishSnapshot(t, ctx, publisher, opts, "test: old snapshot") + + readerCfgPath := filepath.Join(dir, "reader.toml") + readerCfg := config.Default() + readerCfg.DBPath = filepath.Join(dir, "reader.db") + readerCfg.Share.Remote = remoteRepo + readerCfg.Share.RepoPath = filepath.Join(dir, "reader-share") + readerCfg.Share.AutoUpdate = true + readerCfg.Share.StaleAfter = "15m" + readerCfg.Discord.TokenSource = "none" + require.NoError(t, config.Write(readerCfgPath, readerCfg)) + + var out bytes.Buffer + require.NoError(t, Run(ctx, []string{"--config", readerCfgPath, "update"}, &out, &bytes.Buffer{})) + out.Reset() + require.NoError(t, Run(ctx, []string{"--config", readerCfgPath, "search", "automatic"}, &out, &bytes.Buffer{})) + require.Contains(t, out.String(), "automatic updates work") + + require.NoError(t, publisher.UpsertMessage(ctx, store.MessageRecord{ + ID: "m200", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u1", + AuthorName: "Peter", + MessageType: 0, + CreatedAt: time.Now().UTC().Add(time.Minute).Format(time.RFC3339Nano), + Content: "newer git snapshot arrived", + NormalizedContent: "newer git snapshot arrived", + RawJSON: `{}`, + })) + publishSnapshot(t, ctx, publisher, opts, "test: new snapshot") + + out.Reset() + require.NoError(t, Run(ctx, []string{"--config", readerCfgPath, "update"}, &out, &bytes.Buffer{})) + out.Reset() + require.NoError(t, Run(ctx, []string{"--config", readerCfgPath, "search", "newer snapshot"}, &out, &bytes.Buffer{})) + require.Contains(t, out.String(), "newer git snapshot arrived") +} + +func TestSyncImportsGitShareBeforeLiveDiscord(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + remoteRepo := filepath.Join(dir, "remote.git") + runGit(t, dir, "init", "--bare", remoteRepo) + + publisherDB := filepath.Join(dir, "publisher.db") + publisher := seedCLIStore(t, publisherDB) + defer func() { _ = publisher.Close() }() + publisherRepo := filepath.Join(dir, "publisher-share") + opts := share.Options{RepoPath: publisherRepo, Remote: remoteRepo, Branch: "main"} + publishSnapshot(t, ctx, publisher, opts, "test: git snapshot") + + cfgPath := filepath.Join(dir, "config.toml") + cfg := config.Default() + cfg.DBPath = filepath.Join(dir, "reader.db") + cfg.DefaultGuildID = "g1" + cfg.Share.Remote = remoteRepo + cfg.Share.RepoPath = filepath.Join(dir, "reader-share") + cfg.Share.AutoUpdate = true + cfg.Share.StaleAfter = "15m" + require.NoError(t, config.Write(cfgPath, cfg)) + + hybrid := &hybridSyncService{} + rt := &runtime{ + ctx: ctx, + configPath: cfgPath, + stdout: &bytes.Buffer{}, + stderr: &bytes.Buffer{}, + logger: discardLogger(), + openStore: store.Open, + newDiscord: func(config.Config) (discordClient, error) { + return &fakeDiscordClient{guilds: []*discordgo.UserGuild{{ID: "g1"}}, self: &discordgo.User{ID: "bot"}}, nil + }, + newSyncer: func(_ syncer.Client, s *store.Store, _ *slog.Logger) syncService { + hybrid.store = s + return hybrid + }, + } + + require.NoError(t, rt.dispatch([]string{"sync", "--all"})) + require.True(t, hybrid.sawGitMessage) + + reader, err := store.Open(ctx, cfg.DBPath) + require.NoError(t, err) + defer func() { _ = reader.Close() }() + rows, err := reader.ListMessages(ctx, store.MessageListOptions{Channel: "general", IncludeEmpty: true}) + require.NoError(t, err) + contents := make([]string, 0, len(rows)) + for _, row := range rows { + contents = append(contents, row.Content) + } + require.Contains(t, contents, "automatic updates work") + require.Contains(t, contents, "live discord filled the delta") +} + func seedCLIStore(t *testing.T, path string) *store.Store { t.Helper() ctx := context.Background() @@ -311,6 +418,18 @@ func seedCLIStore(t *testing.T, path string) *store.Store { return s } +func publishSnapshot(t *testing.T, ctx context.Context, s *store.Store, opts share.Options, message string) { + t.Helper() + _, err := share.Export(ctx, s, opts) + require.NoError(t, err) + runGit(t, opts.RepoPath, "config", "user.name", "discrawl test") + runGit(t, opts.RepoPath, "config", "user.email", "discrawl@example.com") + committed, err := share.Commit(ctx, opts, message) + require.NoError(t, err) + require.True(t, committed) + require.NoError(t, share.Push(ctx, opts)) +} + func runGit(t *testing.T, dir string, args ...string) { t.Helper() // #nosec G204 -- fixed git argv in test setup. @@ -397,6 +516,54 @@ func (f *fakeSyncService) SetAttachmentTextEnabled(enabled bool) { f.attachmentTextEnabled = enabled } +type hybridSyncService struct { + store *store.Store + sawGitMessage bool +} + +func (f *hybridSyncService) DiscoverGuilds(context.Context) ([]*discordgo.UserGuild, error) { + return []*discordgo.UserGuild{{ID: "g1"}}, nil +} + +func (f *hybridSyncService) Sync(ctx context.Context, opts syncer.SyncOptions) (syncer.SyncStats, error) { + rows, err := f.store.ListMessages(ctx, store.MessageListOptions{Channel: "general", IncludeEmpty: true}) + if err != nil { + return syncer.SyncStats{}, err + } + for _, row := range rows { + if row.Content == "automatic updates work" { + f.sawGitMessage = true + break + } + } + if err := f.store.UpsertGuild(ctx, store.GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}); err != nil { + return syncer.SyncStats{}, err + } + if err := f.store.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}); err != nil { + return syncer.SyncStats{}, err + } + if err := f.store.UpsertMessage(ctx, store.MessageRecord{ + ID: "m-live", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u1", + AuthorName: "Peter", + MessageType: 0, + CreatedAt: time.Now().UTC().Add(time.Minute).Format(time.RFC3339Nano), + Content: "live discord filled the delta", + NormalizedContent: "live discord filled the delta", + RawJSON: `{}`, + }); err != nil { + return syncer.SyncStats{}, err + } + return syncer.SyncStats{Guilds: len(opts.GuildIDs), Messages: 1}, nil +} + +func (f *hybridSyncService) RunTail(context.Context, []string, time.Duration) error { + return nil +} + func TestRuntimeInitSyncTailAndDoctor(t *testing.T) { ctx := context.Background() dir := t.TempDir() diff --git a/internal/cli/docker_smoke_test.go b/internal/cli/docker_smoke_test.go new file mode 100644 index 0000000..58f262c --- /dev/null +++ b/internal/cli/docker_smoke_test.go @@ -0,0 +1,33 @@ +package cli + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestDockerGitSourceSmoke(t *testing.T) { + if os.Getenv("DISCRAWL_DOCKER_TEST") != "1" { + t.Skip("set DISCRAWL_DOCKER_TEST=1 to run the Docker git-source smoke test") + } + if _, err := exec.LookPath("docker"); err != nil { + t.Skip("docker is not installed") + } + root := repoRoot(t) + cmd := exec.Command("bash", filepath.Join(root, "scripts", "docker-git-source-smoke.sh")) + cmd.Dir = root + out, err := cmd.CombinedOutput() + require.NoError(t, err, string(out)) +} + +func repoRoot(t *testing.T) string { + t.Helper() + cmd := exec.Command("git", "rev-parse", "--show-toplevel") + out, err := cmd.Output() + require.NoError(t, err) + return strings.TrimSpace(string(out)) +} diff --git a/scripts/docker-git-source-smoke.sh b/scripts/docker-git-source-smoke.sh new file mode 100755 index 0000000..20185f9 --- /dev/null +++ b/scripts/docker-git-source-smoke.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +set -euo pipefail + +repo_root="$(git -C "$(dirname "${BASH_SOURCE[0]}")/.." rev-parse --show-toplevel)" +image="${DISCRAWL_DOCKER_IMAGE:-golang:1.26.2-bookworm}" +tmp="$(mktemp -d /tmp/discrawl-docker-smoke.XXXXXX)" +cleanup() { + rm -rf "$tmp" +} +trap cleanup EXIT + +fixture="$tmp/backup" +mkdir -p "$fixture/tables/guilds" "$fixture/tables/channels" "$fixture/tables/members" "$fixture/tables/messages" + +write_gz() { + local rel="$1" + local body="$2" + printf '%s\n' "$body" | gzip -n -c > "$fixture/$rel" +} + +now="2026-04-21T12:00:00Z" +write_gz "tables/guilds/000000.jsonl.gz" \ + "{\"id\":\"g1\",\"name\":\"Docker Guild\",\"icon\":null,\"raw_json\":\"{}\",\"updated_at\":\"$now\"}" +write_gz "tables/channels/000000.jsonl.gz" \ + "{\"id\":\"c1\",\"guild_id\":\"g1\",\"parent_id\":null,\"kind\":\"text\",\"name\":\"general\",\"topic\":null,\"position\":0,\"is_nsfw\":0,\"is_archived\":0,\"is_locked\":0,\"is_private_thread\":0,\"thread_parent_id\":null,\"archive_timestamp\":null,\"raw_json\":\"{}\",\"updated_at\":\"$now\"}" +write_gz "tables/members/000000.jsonl.gz" \ + "{\"guild_id\":\"g1\",\"user_id\":\"u1\",\"username\":\"peter\",\"global_name\":null,\"display_name\":\"Docker Peter\",\"nick\":null,\"discriminator\":null,\"avatar\":null,\"bot\":0,\"joined_at\":null,\"role_ids_json\":\"[]\",\"raw_json\":\"{}\",\"updated_at\":\"$now\"}" +write_gz "tables/messages/000000.jsonl.gz" \ + "{\"id\":\"m1\",\"guild_id\":\"g1\",\"channel_id\":\"c1\",\"author_id\":\"u1\",\"message_type\":0,\"created_at\":\"$now\",\"edited_at\":null,\"deleted_at\":null,\"content\":\"docker smoke archive is queryable\",\"normalized_content\":\"docker smoke archive is queryable\",\"reply_to_message_id\":null,\"pinned\":0,\"has_attachments\":0,\"raw_json\":\"{}\",\"updated_at\":\"$now\"}" + +cat > "$fixture/manifest.json" < /work/subscribe.out + "$discrawl" --config /work/config.toml search "docker smoke archive" | tee /work/search.out + grep -q "docker smoke archive is queryable" /work/search.out + "$discrawl" --config /work/config.toml messages --channel general --hours 24 --all | tee /work/messages.out + grep -q "docker smoke archive is queryable" /work/messages.out + "$discrawl" --config /work/config.toml --plain sql "select count(*) as total from messages" | tee /work/sql.out + grep -Eq "(^|[^0-9])1([^0-9]|$)" /work/sql.out + "$discrawl" --config /work/config.toml report | tee /work/report.out + grep -q "Archive size: 1 messages" /work/report.out + '