test: cover git-backed archive sync

This commit is contained in:
Peter Steinberger 2026-04-21 05:46:08 +01:00
parent 7825ca0edc
commit 99b68c546e
No known key found for this signature in database
5 changed files with 284 additions and 2 deletions

View File

@ -348,6 +348,14 @@ discrawl subscribe --no-auto-update https://github.com/openclaw/discord-backup.g
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`). `discrawl update` forces the same pull/import step manually.
Hybrid mode is supported too: keep normal Discord credentials configured and set `share.remote`. `discrawl sync` and `discrawl messages --sync` import the Git snapshot first, then use live Discord only to fill anything newer or missing. This keeps day-to-day sync fast while preserving live repair behavior.
The Docker smoke test installs `discrawl` in a clean Go container, subscribes to a Git snapshot repo, then checks `search`, `messages`, `sql`, and `report`:
```bash
DISCRAWL_DOCKER_TEST=1 go test ./internal/cli -run TestDockerGitSourceSmoke -count=1
```
### `report`
Generates the Markdown activity block used by the shared backup repo README.

View File

@ -120,13 +120,13 @@ func (r *runtime) dispatch(rest []string) error {
case "init":
return r.runInit(rest[1:])
case "sync":
return r.withServices(true, func() error { return r.runSync(rest[1:]) })
return r.withServicesAuto(true, true, func() error { return r.runSync(rest[1:]) })
case "tail":
return r.withServices(true, func() error { return r.runTail(rest[1:]) })
case "search":
return r.withServices(false, func() error { return r.runSearch(rest[1:]) })
case "messages":
return r.withServices(hasBoolFlag(rest[1:], "--sync"), func() error { return r.runMessages(rest[1:]) })
return r.withServicesAuto(hasBoolFlag(rest[1:], "--sync"), true, func() error { return r.runMessages(rest[1:]) })
case "mentions":
return r.withServices(false, func() error { return r.runMentions(rest[1:]) })
case "sql":

View File

@ -287,6 +287,113 @@ func TestSubscribeGitOnlyModeNeedsNoDiscordCredentials(t *testing.T) {
require.Contains(t, err.Error(), "discord token disabled")
}
func TestShareUpdateImportsNewRemoteSnapshot(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
remoteRepo := filepath.Join(dir, "remote.git")
runGit(t, dir, "init", "--bare", remoteRepo)
publisherDB := filepath.Join(dir, "publisher.db")
publisher := seedCLIStore(t, publisherDB)
defer func() { _ = publisher.Close() }()
publisherRepo := filepath.Join(dir, "publisher-share")
opts := share.Options{RepoPath: publisherRepo, Remote: remoteRepo, Branch: "main"}
publishSnapshot(t, ctx, publisher, opts, "test: old snapshot")
readerCfgPath := filepath.Join(dir, "reader.toml")
readerCfg := config.Default()
readerCfg.DBPath = filepath.Join(dir, "reader.db")
readerCfg.Share.Remote = remoteRepo
readerCfg.Share.RepoPath = filepath.Join(dir, "reader-share")
readerCfg.Share.AutoUpdate = true
readerCfg.Share.StaleAfter = "15m"
readerCfg.Discord.TokenSource = "none"
require.NoError(t, config.Write(readerCfgPath, readerCfg))
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", readerCfgPath, "update"}, &out, &bytes.Buffer{}))
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", readerCfgPath, "search", "automatic"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "automatic updates work")
require.NoError(t, publisher.UpsertMessage(ctx, store.MessageRecord{
ID: "m200",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "general",
AuthorID: "u1",
AuthorName: "Peter",
MessageType: 0,
CreatedAt: time.Now().UTC().Add(time.Minute).Format(time.RFC3339Nano),
Content: "newer git snapshot arrived",
NormalizedContent: "newer git snapshot arrived",
RawJSON: `{}`,
}))
publishSnapshot(t, ctx, publisher, opts, "test: new snapshot")
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", readerCfgPath, "update"}, &out, &bytes.Buffer{}))
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", readerCfgPath, "search", "newer snapshot"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "newer git snapshot arrived")
}
func TestSyncImportsGitShareBeforeLiveDiscord(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
remoteRepo := filepath.Join(dir, "remote.git")
runGit(t, dir, "init", "--bare", remoteRepo)
publisherDB := filepath.Join(dir, "publisher.db")
publisher := seedCLIStore(t, publisherDB)
defer func() { _ = publisher.Close() }()
publisherRepo := filepath.Join(dir, "publisher-share")
opts := share.Options{RepoPath: publisherRepo, Remote: remoteRepo, Branch: "main"}
publishSnapshot(t, ctx, publisher, opts, "test: git snapshot")
cfgPath := filepath.Join(dir, "config.toml")
cfg := config.Default()
cfg.DBPath = filepath.Join(dir, "reader.db")
cfg.DefaultGuildID = "g1"
cfg.Share.Remote = remoteRepo
cfg.Share.RepoPath = filepath.Join(dir, "reader-share")
cfg.Share.AutoUpdate = true
cfg.Share.StaleAfter = "15m"
require.NoError(t, config.Write(cfgPath, cfg))
hybrid := &hybridSyncService{}
rt := &runtime{
ctx: ctx,
configPath: cfgPath,
stdout: &bytes.Buffer{},
stderr: &bytes.Buffer{},
logger: discardLogger(),
openStore: store.Open,
newDiscord: func(config.Config) (discordClient, error) {
return &fakeDiscordClient{guilds: []*discordgo.UserGuild{{ID: "g1"}}, self: &discordgo.User{ID: "bot"}}, nil
},
newSyncer: func(_ syncer.Client, s *store.Store, _ *slog.Logger) syncService {
hybrid.store = s
return hybrid
},
}
require.NoError(t, rt.dispatch([]string{"sync", "--all"}))
require.True(t, hybrid.sawGitMessage)
reader, err := store.Open(ctx, cfg.DBPath)
require.NoError(t, err)
defer func() { _ = reader.Close() }()
rows, err := reader.ListMessages(ctx, store.MessageListOptions{Channel: "general", IncludeEmpty: true})
require.NoError(t, err)
contents := make([]string, 0, len(rows))
for _, row := range rows {
contents = append(contents, row.Content)
}
require.Contains(t, contents, "automatic updates work")
require.Contains(t, contents, "live discord filled the delta")
}
func seedCLIStore(t *testing.T, path string) *store.Store {
t.Helper()
ctx := context.Background()
@ -311,6 +418,18 @@ func seedCLIStore(t *testing.T, path string) *store.Store {
return s
}
func publishSnapshot(t *testing.T, ctx context.Context, s *store.Store, opts share.Options, message string) {
t.Helper()
_, err := share.Export(ctx, s, opts)
require.NoError(t, err)
runGit(t, opts.RepoPath, "config", "user.name", "discrawl test")
runGit(t, opts.RepoPath, "config", "user.email", "discrawl@example.com")
committed, err := share.Commit(ctx, opts, message)
require.NoError(t, err)
require.True(t, committed)
require.NoError(t, share.Push(ctx, opts))
}
func runGit(t *testing.T, dir string, args ...string) {
t.Helper()
// #nosec G204 -- fixed git argv in test setup.
@ -397,6 +516,54 @@ func (f *fakeSyncService) SetAttachmentTextEnabled(enabled bool) {
f.attachmentTextEnabled = enabled
}
type hybridSyncService struct {
store *store.Store
sawGitMessage bool
}
func (f *hybridSyncService) DiscoverGuilds(context.Context) ([]*discordgo.UserGuild, error) {
return []*discordgo.UserGuild{{ID: "g1"}}, nil
}
func (f *hybridSyncService) Sync(ctx context.Context, opts syncer.SyncOptions) (syncer.SyncStats, error) {
rows, err := f.store.ListMessages(ctx, store.MessageListOptions{Channel: "general", IncludeEmpty: true})
if err != nil {
return syncer.SyncStats{}, err
}
for _, row := range rows {
if row.Content == "automatic updates work" {
f.sawGitMessage = true
break
}
}
if err := f.store.UpsertGuild(ctx, store.GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}); err != nil {
return syncer.SyncStats{}, err
}
if err := f.store.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}); err != nil {
return syncer.SyncStats{}, err
}
if err := f.store.UpsertMessage(ctx, store.MessageRecord{
ID: "m-live",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "general",
AuthorID: "u1",
AuthorName: "Peter",
MessageType: 0,
CreatedAt: time.Now().UTC().Add(time.Minute).Format(time.RFC3339Nano),
Content: "live discord filled the delta",
NormalizedContent: "live discord filled the delta",
RawJSON: `{}`,
}); err != nil {
return syncer.SyncStats{}, err
}
return syncer.SyncStats{Guilds: len(opts.GuildIDs), Messages: 1}, nil
}
func (f *hybridSyncService) RunTail(context.Context, []string, time.Duration) error {
return nil
}
func TestRuntimeInitSyncTailAndDoctor(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()

View File

@ -0,0 +1,33 @@
package cli
import (
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestDockerGitSourceSmoke(t *testing.T) {
if os.Getenv("DISCRAWL_DOCKER_TEST") != "1" {
t.Skip("set DISCRAWL_DOCKER_TEST=1 to run the Docker git-source smoke test")
}
if _, err := exec.LookPath("docker"); err != nil {
t.Skip("docker is not installed")
}
root := repoRoot(t)
cmd := exec.Command("bash", filepath.Join(root, "scripts", "docker-git-source-smoke.sh"))
cmd.Dir = root
out, err := cmd.CombinedOutput()
require.NoError(t, err, string(out))
}
func repoRoot(t *testing.T) string {
t.Helper()
cmd := exec.Command("git", "rev-parse", "--show-toplevel")
out, err := cmd.Output()
require.NoError(t, err)
return strings.TrimSpace(string(out))
}

View File

@ -0,0 +1,74 @@
#!/usr/bin/env bash
set -euo pipefail
repo_root="$(git -C "$(dirname "${BASH_SOURCE[0]}")/.." rev-parse --show-toplevel)"
image="${DISCRAWL_DOCKER_IMAGE:-golang:1.26.2-bookworm}"
tmp="$(mktemp -d /tmp/discrawl-docker-smoke.XXXXXX)"
cleanup() {
rm -rf "$tmp"
}
trap cleanup EXIT
fixture="$tmp/backup"
mkdir -p "$fixture/tables/guilds" "$fixture/tables/channels" "$fixture/tables/members" "$fixture/tables/messages"
write_gz() {
local rel="$1"
local body="$2"
printf '%s\n' "$body" | gzip -n -c > "$fixture/$rel"
}
now="2026-04-21T12:00:00Z"
write_gz "tables/guilds/000000.jsonl.gz" \
"{\"id\":\"g1\",\"name\":\"Docker Guild\",\"icon\":null,\"raw_json\":\"{}\",\"updated_at\":\"$now\"}"
write_gz "tables/channels/000000.jsonl.gz" \
"{\"id\":\"c1\",\"guild_id\":\"g1\",\"parent_id\":null,\"kind\":\"text\",\"name\":\"general\",\"topic\":null,\"position\":0,\"is_nsfw\":0,\"is_archived\":0,\"is_locked\":0,\"is_private_thread\":0,\"thread_parent_id\":null,\"archive_timestamp\":null,\"raw_json\":\"{}\",\"updated_at\":\"$now\"}"
write_gz "tables/members/000000.jsonl.gz" \
"{\"guild_id\":\"g1\",\"user_id\":\"u1\",\"username\":\"peter\",\"global_name\":null,\"display_name\":\"Docker Peter\",\"nick\":null,\"discriminator\":null,\"avatar\":null,\"bot\":0,\"joined_at\":null,\"role_ids_json\":\"[]\",\"raw_json\":\"{}\",\"updated_at\":\"$now\"}"
write_gz "tables/messages/000000.jsonl.gz" \
"{\"id\":\"m1\",\"guild_id\":\"g1\",\"channel_id\":\"c1\",\"author_id\":\"u1\",\"message_type\":0,\"created_at\":\"$now\",\"edited_at\":null,\"deleted_at\":null,\"content\":\"docker smoke archive is queryable\",\"normalized_content\":\"docker smoke archive is queryable\",\"reply_to_message_id\":null,\"pinned\":0,\"has_attachments\":0,\"raw_json\":\"{}\",\"updated_at\":\"$now\"}"
cat > "$fixture/manifest.json" <<JSON
{
"version": 1,
"generated_at": "$now",
"tables": [
{"name": "guilds", "files": ["tables/guilds/000000.jsonl.gz"], "columns": ["id", "name", "icon", "raw_json", "updated_at"], "rows": 1},
{"name": "channels", "files": ["tables/channels/000000.jsonl.gz"], "columns": ["id", "guild_id", "parent_id", "kind", "name", "topic", "position", "is_nsfw", "is_archived", "is_locked", "is_private_thread", "thread_parent_id", "archive_timestamp", "raw_json", "updated_at"], "rows": 1},
{"name": "members", "files": ["tables/members/000000.jsonl.gz"], "columns": ["guild_id", "user_id", "username", "global_name", "display_name", "nick", "discriminator", "avatar", "bot", "joined_at", "role_ids_json", "raw_json", "updated_at"], "rows": 1},
{"name": "messages", "files": ["tables/messages/000000.jsonl.gz"], "columns": ["id", "guild_id", "channel_id", "author_id", "message_type", "created_at", "edited_at", "deleted_at", "content", "normalized_content", "reply_to_message_id", "pinned", "has_attachments", "raw_json", "updated_at"], "rows": 1}
],
"files": {"manifest": "manifest.json"}
}
JSON
git -C "$fixture" init
git -C "$fixture" checkout -B main
git -C "$fixture" config user.name "discrawl docker smoke"
git -C "$fixture" config user.email "discrawl@example.com"
git -C "$fixture" add .
git -C "$fixture" commit -m "test: fixture snapshot"
docker run --rm \
--mount "type=bind,source=$repo_root,target=/src,readonly" \
--mount "type=bind,source=$fixture,target=/backup,readonly" \
"$image" \
bash -lc '
set -euo pipefail
mkdir -p /work/bin /work/gocache /work/gomodcache
export PATH=/usr/local/go/bin:$PATH
export GOBIN=/work/bin GOCACHE=/work/gocache GOMODCACHE=/work/gomodcache
cd /src
go install ./cmd/discrawl
discrawl=/work/bin/discrawl
"$discrawl" --version | grep -q "0.2.0"
"$discrawl" --config /work/config.toml subscribe --repo /work/share file:///backup > /work/subscribe.out
"$discrawl" --config /work/config.toml search "docker smoke archive" | tee /work/search.out
grep -q "docker smoke archive is queryable" /work/search.out
"$discrawl" --config /work/config.toml messages --channel general --hours 24 --all | tee /work/messages.out
grep -q "docker smoke archive is queryable" /work/messages.out
"$discrawl" --config /work/config.toml --plain sql "select count(*) as total from messages" | tee /work/sql.out
grep -Eq "(^|[^0-9])1([^0-9]|$)" /work/sql.out
"$discrawl" --config /work/config.toml report | tee /work/report.out
grep -q "Archive size: 1 messages" /work/report.out
'