fix(share): filter local-only snapshot imports

This commit is contained in:
Vincent Koc 2026-05-02 14:53:27 -07:00
parent 57ee1835df
commit 208172fe42
No known key found for this signature in database
4 changed files with 59 additions and 1 deletions

2
go.mod
View File

@ -41,7 +41,7 @@ require (
github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/vincentkoc/crawlkit v0.3.11
github.com/vincentkoc/crawlkit v0.3.12
golang.org/x/crypto v0.50.0 // indirect
golang.org/x/tools v0.44.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect

2
go.sum
View File

@ -75,6 +75,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/vincentkoc/crawlkit v0.3.11 h1:UhhRD6ZWa0j4+x9flWLhNe3fqlNwmP61eDZwdb0AyTU=
github.com/vincentkoc/crawlkit v0.3.11/go.mod h1:tSSR6CmUqKmfoxzxxRJGARm95sH+Acu63nhzrXkpXo0=
github.com/vincentkoc/crawlkit v0.3.12 h1:2hs4DXk6LkI4sdbgnFU+mUNaC2gmhQfkMx5C+bbDzJE=
github.com/vincentkoc/crawlkit v0.3.12/go.mod h1:tSSR6CmUqKmfoxzxxRJGARm95sH+Acu63nhzrXkpXo0=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs=

View File

@ -174,6 +174,9 @@ func Import(ctx context.Context, s *store.Store, opts Options) (Manifest, error)
DB: s.DB(),
RootDir: opts.RepoPath,
DeleteTables: SnapshotTables,
Filter: func(table string, row map[string]any) (bool, error) {
return !isDirectMessageSnapshotRow(table, row), nil
},
BeforeImport: func(ctx context.Context, tx *sql.Tx) error {
for _, table := range []string{"message_fts", "member_fts"} {
if _, err := tx.ExecContext(ctx, "drop table if exists "+table); err != nil {

View File

@ -184,6 +184,26 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "channels")), directMessageGuildID)
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "messages")), "private dm content")
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "sync_state")), "wiretap:last_import")
manifest = appendSnapshotRow(t, repo, manifest, "messages", map[string]any{
"id": "hostile-dm",
"guild_id": directMessageGuildID,
"channel_id": "dm-c2",
"author_id": "u9",
"message_type": 0,
"created_at": "2026-04-24T16:00:00Z",
"content": "hostile imported dm",
"normalized_content": "hostile imported dm",
"pinned": 0,
"has_attachments": 0,
"raw_json": `{}`,
"updated_at": "2026-04-24T16:00:00Z",
})
manifest = appendSnapshotRow(t, repo, manifest, "sync_state", map[string]any{
"scope": "wiretap:hostile",
"cursor": "private",
"updated_at": "2026-04-24T16:00:00Z",
})
writeShareManifest(t, repo, manifest)
dst, err := store.Open(ctx, filepath.Join(t.TempDir(), "dst.db"))
require.NoError(t, err)
@ -202,6 +222,12 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
wiretapState, err := dst.GetSyncState(ctx, "wiretap:last_import")
require.NoError(t, err)
require.Equal(t, "2026-04-24T15:33:17Z", wiretapState)
hostileResults, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "hostile imported dm", Limit: 10})
require.NoError(t, err)
require.Empty(t, hostileResults)
_, rows, err := dst.ReadOnlyQuery(ctx, "select count(*) from sync_state where scope = 'wiretap:hostile'")
require.NoError(t, err)
require.Equal(t, "0", rows[0][0])
}
func TestExportImportEmbeddingsOptIn(t *testing.T) {
@ -800,6 +826,33 @@ func writeGzipJSONLines(t *testing.T, path string, lines []string) {
require.NoError(t, file.Close())
}
func appendSnapshotRow(t *testing.T, repo string, manifest Manifest, tableName string, row map[string]any) Manifest {
t.Helper()
for i := range manifest.Tables {
if manifest.Tables[i].Name != tableName {
continue
}
rel := filepath.ToSlash(filepath.Join("tables", tableName, "hostile-"+strconv.Itoa(len(manifest.Tables[i].Files))+".jsonl.gz"))
full := filepath.Join(repo, filepath.FromSlash(rel))
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
body, err := json.Marshal(row)
require.NoError(t, err)
writeGzipJSONLines(t, full, []string{string(body)})
manifest.Tables[i].Files = append(manifest.Tables[i].Files, rel)
manifest.Tables[i].Rows++
return manifest
}
t.Fatalf("table %s not found", tableName)
return manifest
}
func writeShareManifest(t *testing.T, repo string, manifest Manifest) {
t.Helper()
body, err := json.MarshalIndent(manifest, "", " ")
require.NoError(t, err)
require.NoError(t, os.WriteFile(filepath.Join(repo, ManifestName), append(body, '\n'), 0o600))
}
func snapshotTableText(t *testing.T, repo string, table TableManifest) string {
t.Helper()
return snapshotFilesText(t, repo, table.Files)