From e918bf494a5225454b3e779d883c4e18a78dc18f Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Sat, 2 May 2026 14:53:27 -0700 Subject: [PATCH] fix(share): filter local-only snapshot imports --- go.mod | 2 +- go.sum | 2 ++ internal/share/share.go | 3 ++ internal/share/share_test.go | 53 ++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index b04cfc0..f44cab7 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( github.com/ncruces/go-strftime v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect - github.com/vincentkoc/crawlkit v0.3.11 + github.com/vincentkoc/crawlkit v0.3.12 golang.org/x/crypto v0.50.0 // indirect golang.org/x/tools v0.44.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect diff --git a/go.sum b/go.sum index e7ab4b1..7e0599b 100644 --- a/go.sum +++ b/go.sum @@ -75,6 +75,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/vincentkoc/crawlkit v0.3.11 h1:UhhRD6ZWa0j4+x9flWLhNe3fqlNwmP61eDZwdb0AyTU= github.com/vincentkoc/crawlkit v0.3.11/go.mod h1:tSSR6CmUqKmfoxzxxRJGARm95sH+Acu63nhzrXkpXo0= +github.com/vincentkoc/crawlkit v0.3.12 h1:2hs4DXk6LkI4sdbgnFU+mUNaC2gmhQfkMx5C+bbDzJE= +github.com/vincentkoc/crawlkit v0.3.12/go.mod h1:tSSR6CmUqKmfoxzxxRJGARm95sH+Acu63nhzrXkpXo0= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs= diff --git a/internal/share/share.go b/internal/share/share.go index 4918777..5e83837 100644 --- a/internal/share/share.go +++ b/internal/share/share.go @@ -174,6 +174,9 @@ func Import(ctx context.Context, s *store.Store, opts Options) (Manifest, error) DB: s.DB(), RootDir: opts.RepoPath, DeleteTables: SnapshotTables, + Filter: func(table string, row map[string]any) (bool, error) { + return !isDirectMessageSnapshotRow(table, row), nil + }, BeforeImport: func(ctx context.Context, tx *sql.Tx) error { for _, table := range []string{"message_fts", "member_fts"} { if _, err := tx.ExecContext(ctx, "drop table if exists "+table); err != nil { diff --git a/internal/share/share_test.go b/internal/share/share_test.go index 121cce0..d0995b2 100644 --- a/internal/share/share_test.go +++ b/internal/share/share_test.go @@ -184,6 +184,26 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) { require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "channels")), directMessageGuildID) require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "messages")), "private dm content") require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "sync_state")), "wiretap:last_import") + manifest = appendSnapshotRow(t, repo, manifest, "messages", map[string]any{ + "id": "hostile-dm", + "guild_id": directMessageGuildID, + "channel_id": "dm-c2", + "author_id": "u9", + "message_type": 0, + "created_at": "2026-04-24T16:00:00Z", + "content": "hostile imported dm", + "normalized_content": "hostile imported dm", + "pinned": 0, + "has_attachments": 0, + "raw_json": `{}`, + "updated_at": "2026-04-24T16:00:00Z", + }) + manifest = appendSnapshotRow(t, repo, manifest, "sync_state", map[string]any{ + "scope": "wiretap:hostile", + "cursor": "private", + "updated_at": "2026-04-24T16:00:00Z", + }) + writeShareManifest(t, repo, manifest) dst, err := store.Open(ctx, filepath.Join(t.TempDir(), "dst.db")) require.NoError(t, err) @@ -202,6 +222,12 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) { wiretapState, err := dst.GetSyncState(ctx, "wiretap:last_import") require.NoError(t, err) require.Equal(t, "2026-04-24T15:33:17Z", wiretapState) + hostileResults, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "hostile imported dm", Limit: 10}) + require.NoError(t, err) + require.Empty(t, hostileResults) + _, rows, err := dst.ReadOnlyQuery(ctx, "select count(*) from sync_state where scope = 'wiretap:hostile'") + require.NoError(t, err) + require.Equal(t, "0", rows[0][0]) } func TestExportImportEmbeddingsOptIn(t *testing.T) { @@ -800,6 +826,33 @@ func writeGzipJSONLines(t *testing.T, path string, lines []string) { require.NoError(t, file.Close()) } +func appendSnapshotRow(t *testing.T, repo string, manifest Manifest, tableName string, row map[string]any) Manifest { + t.Helper() + for i := range manifest.Tables { + if manifest.Tables[i].Name != tableName { + continue + } + rel := filepath.ToSlash(filepath.Join("tables", tableName, "hostile-"+strconv.Itoa(len(manifest.Tables[i].Files))+".jsonl.gz")) + full := filepath.Join(repo, filepath.FromSlash(rel)) + require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755)) + body, err := json.Marshal(row) + require.NoError(t, err) + writeGzipJSONLines(t, full, []string{string(body)}) + manifest.Tables[i].Files = append(manifest.Tables[i].Files, rel) + manifest.Tables[i].Rows++ + return manifest + } + t.Fatalf("table %s not found", tableName) + return manifest +} + +func writeShareManifest(t *testing.T, repo string, manifest Manifest) { + t.Helper() + body, err := json.MarshalIndent(manifest, "", " ") + require.NoError(t, err) + require.NoError(t, os.WriteFile(filepath.Join(repo, ManifestName), append(body, '\n'), 0o600)) +} + func snapshotTableText(t *testing.T, repo string, table TableManifest) string { t.Helper() return snapshotFilesText(t, repo, table.Files)