fix: remove unreleased twitter import
This commit is contained in:
parent
4f6d70830d
commit
37a595bd7d
10
CHANGELOG.md
10
CHANGELOG.md
@ -4,16 +4,6 @@ All notable changes to `discrawl` will be documented in this file.
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Changes
|
||||
|
||||
- Added `twitter import` / `x import` for local X/Twitter archive `.zip` files, storing tweets, likes, and direct messages under synthetic guild id `x` so existing Discrawl search and SQL commands can query them.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Git snapshot imports now preserve local X/Twitter archive rows and `twitter:*` sync state, matching the local-only handling already used for wiretap DMs.
|
||||
|
||||
## 0.6.1 - 2026-04-25
|
||||
|
||||
### Maintenance
|
||||
|
||||
- Refreshed Go module dependencies and CI tool/action pins, including staticcheck, gofumpt, gosec, govulncheck, gitleaks, setup-node, and GoReleaser.
|
||||
|
||||
18
README.md
18
README.md
@ -268,24 +268,6 @@ Notes:
|
||||
- does not extract, store, or print Discord auth tokens
|
||||
- `--max-file-bytes` skips unusually large files; default is 64 MiB
|
||||
|
||||
### `twitter import`
|
||||
|
||||
Imports a local X/Twitter archive `.zip` into the same SQLite message/search tables.
|
||||
|
||||
```bash
|
||||
discrawl twitter import --archive ~/Downloads/twitter-2025-08-05.zip
|
||||
discrawl x import --archive ~/Downloads/twitter-2025-08-05.zip --dry-run
|
||||
discrawl search --guild x "launch checklist"
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- stores imported tweets, likes, and direct messages under synthetic guild id `x`
|
||||
- maps tweets to `x:tweets`, liked tweet text to `x:likes`, and each DM conversation to an `x:dm:*` channel
|
||||
- uses the archive's JS assignment files such as `data/account.js`, `data/tweets*.js`, `data/like.js`, `data/direct-messages.js`, and `data/direct-messages-group.js`
|
||||
- prefixes imported message ids with `x:tweet:`, `x:like:`, or `x:dm:` so they cannot collide with Discord snowflakes
|
||||
- stamps `twitter:last_import` and `twitter:last_archive` in `sync_state`
|
||||
|
||||
### `search`
|
||||
|
||||
Searches archived messages. FTS is the default mode and works without embeddings.
|
||||
|
||||
@ -128,8 +128,6 @@ func (r *runtime) dispatch(rest []string) error {
|
||||
return r.withServices(true, func() error { return r.runTail(rest[1:]) })
|
||||
case "wiretap":
|
||||
return r.withLocalStoreDefault(false, func() error { return r.runWiretap(rest[1:]) })
|
||||
case "twitter", "x":
|
||||
return r.withLocalStoreDefault(false, func() error { return r.runTwitter(rest[1:]) })
|
||||
case "search":
|
||||
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
|
||||
return r.withLocalStoreDefault(autoShareUpdate, func() error { return r.runSearch(rest[1:]) })
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
@ -183,49 +182,6 @@ func TestWiretapImportsDesktopDirectMessages(t *testing.T) {
|
||||
require.Contains(t, out.String(), "secret DM launch plan")
|
||||
}
|
||||
|
||||
func TestTwitterImportCommand(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
archivePath := filepath.Join(dir, "twitter.zip")
|
||||
writeTwitterArchiveFixture(t, archivePath)
|
||||
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = dbPath
|
||||
cfg.Discord.TokenSource = "none"
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "twitter", "import", "--archive", archivePath}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "tweets=1")
|
||||
require.Contains(t, out.String(), "dm_messages=1")
|
||||
|
||||
out.Reset()
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "--guild", "x", "secret roadmap"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "secret roadmap")
|
||||
}
|
||||
|
||||
func writeTwitterArchiveFixture(t *testing.T, path string) {
|
||||
t.Helper()
|
||||
file, err := os.Create(path)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = file.Close() }()
|
||||
zw := zip.NewWriter(file)
|
||||
defer func() { require.NoError(t, zw.Close()) }()
|
||||
writeTwitterZipEntry(t, zw, "data/account.js", `window.YTD.account.part0 = [{"account":{"username":"steipete","accountId":"25401953","accountDisplayName":"Peter Steinberger"}}]`)
|
||||
writeTwitterZipEntry(t, zw, "data/tweets.js", `window.YTD.tweets.part0 = [{"tweet":{"id_str":"1952542067017584782","created_at":"Tue Aug 05 01:27:59 +0000 2025","full_text":"archive tweet search text","entities":{"user_mentions":[]}}}]`)
|
||||
writeTwitterZipEntry(t, zw, "data/direct-messages.js", `window.YTD.direct_messages.part0 = [{"dmConversation":{"conversationId":"929-25401953","messages":[{"messageCreate":{"recipientId":"929","senderId":"25401953","id":"1052590933307461636","createdAt":"2018-10-17T16:03:29.391Z","text":"secret roadmap","mediaUrls":[]}}]}}]`)
|
||||
}
|
||||
|
||||
func writeTwitterZipEntry(t *testing.T, zw *zip.Writer, name, body string) {
|
||||
t.Helper()
|
||||
w, err := zw.Create(name)
|
||||
require.NoError(t, err)
|
||||
_, err = w.Write([]byte(body))
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestParseMessageWindow(t *testing.T) {
|
||||
rt := &runtime{now: func() time.Time {
|
||||
return time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)
|
||||
|
||||
@ -13,7 +13,6 @@ import (
|
||||
"github.com/steipete/discrawl/internal/discorddesktop"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/syncer"
|
||||
"github.com/steipete/discrawl/internal/twitterarchive"
|
||||
)
|
||||
|
||||
func (r *runtime) print(value any) error {
|
||||
@ -86,8 +85,6 @@ Commands:
|
||||
sync
|
||||
tail
|
||||
wiretap
|
||||
twitter
|
||||
x
|
||||
search
|
||||
messages
|
||||
dms
|
||||
@ -137,10 +134,6 @@ func printHuman(w io.Writer, value any) error {
|
||||
_, err := fmt.Fprintf(w, "path=%s\nfiles=%d\nskipped=%d\nobjects=%d\nguilds=%d\nchannels=%d\nmessages=%d\ndm_messages=%d\ndm_channels=%d\nguild_messages=%d\nskipped_messages=%d\nskipped_channels=%d\ndry_run=%t\n",
|
||||
v.Path, v.FilesScanned, v.FilesSkipped, v.JSONObjects, v.Guilds, v.Channels, v.Messages, v.DMMessages, v.DMChannels, v.GuildMessages, v.SkippedMessages, v.SkippedChannels, v.DryRun)
|
||||
return err
|
||||
case twitterarchive.Stats:
|
||||
_, err := fmt.Fprintf(w, "path=%s\nfiles=%d\naccounts=%d\ntweets=%d\nlikes=%d\ndm_conversations=%d\ndm_messages=%d\nskipped=%d\ndry_run=%t\n",
|
||||
v.Path, v.FilesScanned, v.Accounts, v.Tweets, v.Likes, v.DMConversations, v.DMMessages, v.Skipped, v.DryRun)
|
||||
return err
|
||||
case store.Status:
|
||||
_, err := fmt.Fprintf(w, "db=%s\nguilds=%d\nchannels=%d\nthreads=%d\nmessages=%d\nmembers=%d\nembedding_backlog=%d\nlast_sync=%s\nlast_tail_event=%s\n",
|
||||
v.DBPath, v.GuildCount, v.ChannelCount, v.ThreadCount, v.MessageCount, v.MemberCount, v.EmbeddingBacklog,
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/steipete/discrawl/internal/twitterarchive"
|
||||
)
|
||||
|
||||
func (r *runtime) runTwitter(args []string) error {
|
||||
if len(args) == 0 {
|
||||
return usageErr(fmt.Errorf("twitter requires subcommand: import"))
|
||||
}
|
||||
switch args[0] {
|
||||
case "import":
|
||||
return r.runTwitterImport(args[1:])
|
||||
default:
|
||||
return usageErr(fmt.Errorf("unknown twitter subcommand %q", args[0]))
|
||||
}
|
||||
}
|
||||
|
||||
func (r *runtime) runTwitterImport(args []string) error {
|
||||
fs := flag.NewFlagSet("twitter import", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
archivePath := fs.String("archive", "", "")
|
||||
fs.StringVar(archivePath, "path", "", "")
|
||||
dryRun := fs.Bool("dry-run", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() > 0 {
|
||||
return usageErr(fmt.Errorf("twitter import takes flags only"))
|
||||
}
|
||||
stats, err := twitterarchive.Import(r.ctx, r.store, twitterarchive.Options{
|
||||
Path: *archivePath,
|
||||
DryRun: *dryRun,
|
||||
Now: r.now,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(stats)
|
||||
}
|
||||
@ -24,7 +24,6 @@ const (
|
||||
LastImportSyncScope = "share:last_import_at"
|
||||
LastImportManifestSyncScope = "share:last_import_manifest_generated_at"
|
||||
directMessageGuildID = "@me"
|
||||
twitterArchiveGuildID = "x"
|
||||
)
|
||||
|
||||
var ErrNoManifest = errors.New("share manifest not found")
|
||||
@ -635,11 +634,11 @@ func importColumns(table TableManifest) []string {
|
||||
func snapshotExportQuery(table string) (string, []any) {
|
||||
switch table {
|
||||
case "guilds":
|
||||
return "select * from guilds where id not in (?, ?)", []any{directMessageGuildID, twitterArchiveGuildID}
|
||||
return "select * from guilds where id != ?", []any{directMessageGuildID}
|
||||
case "channels", "members", "messages", "message_events", "message_attachments", "mention_events":
|
||||
return "select * from " + table + " where guild_id not in (?, ?)", []any{directMessageGuildID, twitterArchiveGuildID}
|
||||
return "select * from " + table + " where guild_id != ?", []any{directMessageGuildID}
|
||||
case "sync_state":
|
||||
return "select * from sync_state where scope not like 'wiretap:%' and scope not like 'twitter:%'", nil
|
||||
return "select * from sync_state where scope not like 'wiretap:%'", nil
|
||||
default:
|
||||
return "select * from " + table, nil
|
||||
}
|
||||
@ -648,13 +647,13 @@ func snapshotExportQuery(table string) (string, []any) {
|
||||
func snapshotDeleteQuery(table string) (string, []any) {
|
||||
switch table {
|
||||
case "guilds":
|
||||
return "delete from guilds where id not in (?, ?)", []any{directMessageGuildID, twitterArchiveGuildID}
|
||||
return "delete from guilds where id != ?", []any{directMessageGuildID}
|
||||
case "message_events", "mention_events":
|
||||
return "delete from " + table + " where guild_id not in (?, ?)", []any{directMessageGuildID, twitterArchiveGuildID}
|
||||
return "delete from " + table + " where guild_id != ?", []any{directMessageGuildID}
|
||||
case "channels", "members", "messages", "message_attachments":
|
||||
return "delete from " + table + " where guild_id not in (?, ?)", []any{directMessageGuildID, twitterArchiveGuildID}
|
||||
return "delete from " + table + " where guild_id != ?", []any{directMessageGuildID}
|
||||
case "sync_state":
|
||||
return "delete from sync_state where scope not like 'wiretap:%' and scope not like 'twitter:%'", nil
|
||||
return "delete from sync_state where scope not like 'wiretap:%'", nil
|
||||
default:
|
||||
return "delete from " + table, nil
|
||||
}
|
||||
@ -668,14 +667,14 @@ func isDirectMessageSnapshotRow(table string, row map[string]any) bool {
|
||||
return isLocalOnlyGuildID(stringValue(row["guild_id"]))
|
||||
case "sync_state":
|
||||
scope := stringValue(row["scope"])
|
||||
return strings.HasPrefix(scope, "wiretap:") || strings.HasPrefix(scope, "twitter:")
|
||||
return strings.HasPrefix(scope, "wiretap:")
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isLocalOnlyGuildID(guildID string) bool {
|
||||
return guildID == directMessageGuildID || guildID == twitterArchiveGuildID
|
||||
return guildID == directMessageGuildID
|
||||
}
|
||||
|
||||
func importEmbeddings(ctx context.Context, tx *sql.Tx, opts Options, manifests []EmbeddingManifest) error {
|
||||
|
||||
@ -110,7 +110,6 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
|
||||
src := seedStore(t, filepath.Join(t.TempDir(), "src.db"))
|
||||
defer func() { _ = src.Close() }()
|
||||
seedDirectMessageData(t, ctx, src)
|
||||
seedTwitterArchiveData(t, ctx, src)
|
||||
|
||||
repo := filepath.Join(t.TempDir(), "share")
|
||||
manifest, err := Export(ctx, src, Options{RepoPath: repo, Branch: "main"})
|
||||
@ -119,19 +118,14 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
|
||||
require.Equal(t, 1, tableEntry(t, manifest, "channels").Rows)
|
||||
require.Equal(t, 1, tableEntry(t, manifest, "messages").Rows)
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "guilds")), directMessageGuildID)
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "guilds")), twitterArchiveGuildID)
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "channels")), directMessageGuildID)
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "channels")), "x:tweets")
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "messages")), "private dm content")
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "messages")), "local tweet content")
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "sync_state")), "wiretap:last_import")
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "sync_state")), "twitter:last_import")
|
||||
|
||||
dst, err := store.Open(ctx, filepath.Join(t.TempDir(), "dst.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = dst.Close() }()
|
||||
seedDirectMessageData(t, ctx, dst)
|
||||
seedTwitterArchiveData(t, ctx, dst)
|
||||
|
||||
_, err = Import(ctx, dst, Options{RepoPath: repo, Branch: "main"})
|
||||
require.NoError(t, err)
|
||||
@ -142,16 +136,9 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
|
||||
guildResults, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "launch checklist", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, guildResults, 1)
|
||||
twitterResults, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "local tweet content", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, twitterResults, 1)
|
||||
require.Equal(t, twitterArchiveGuildID, twitterResults[0].GuildID)
|
||||
wiretapState, err := dst.GetSyncState(ctx, "wiretap:last_import")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "2026-04-24T15:33:17Z", wiretapState)
|
||||
twitterState, err := dst.GetSyncState(ctx, "twitter:last_import")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "2026-04-24T16:00:00Z", twitterState)
|
||||
}
|
||||
|
||||
func TestExportImportEmbeddingsOptIn(t *testing.T) {
|
||||
@ -587,31 +574,28 @@ func TestShareSmallHelpersAndValidation(t *testing.T) {
|
||||
require.False(t, isNonFastForwardPush("everything up-to-date"))
|
||||
|
||||
query, args := snapshotExportQuery("messages")
|
||||
require.Equal(t, "select * from messages where guild_id not in (?, ?)", query)
|
||||
require.Equal(t, []any{directMessageGuildID, twitterArchiveGuildID}, args)
|
||||
require.Equal(t, "select * from messages where guild_id != ?", query)
|
||||
require.Equal(t, []any{directMessageGuildID}, args)
|
||||
query, args = snapshotExportQuery("sync_state")
|
||||
require.Equal(t, "select * from sync_state where scope not like 'wiretap:%' and scope not like 'twitter:%'", query)
|
||||
require.Equal(t, "select * from sync_state where scope not like 'wiretap:%'", query)
|
||||
require.Nil(t, args)
|
||||
query, args = snapshotExportQuery("custom")
|
||||
require.Equal(t, "select * from custom", query)
|
||||
require.Nil(t, args)
|
||||
|
||||
query, args = snapshotDeleteQuery("channels")
|
||||
require.Equal(t, "delete from channels where guild_id not in (?, ?)", query)
|
||||
require.Equal(t, []any{directMessageGuildID, twitterArchiveGuildID}, args)
|
||||
require.Equal(t, "delete from channels where guild_id != ?", query)
|
||||
require.Equal(t, []any{directMessageGuildID}, args)
|
||||
query, args = snapshotDeleteQuery("message_events")
|
||||
require.Equal(t, "delete from message_events where guild_id not in (?, ?)", query)
|
||||
require.Equal(t, []any{directMessageGuildID, twitterArchiveGuildID}, args)
|
||||
require.Equal(t, "delete from message_events where guild_id != ?", query)
|
||||
require.Equal(t, []any{directMessageGuildID}, args)
|
||||
query, args = snapshotDeleteQuery("custom")
|
||||
require.Equal(t, "delete from custom", query)
|
||||
require.Nil(t, args)
|
||||
|
||||
require.True(t, isDirectMessageSnapshotRow("guilds", map[string]any{"id": directMessageGuildID}))
|
||||
require.True(t, isDirectMessageSnapshotRow("guilds", map[string]any{"id": twitterArchiveGuildID}))
|
||||
require.True(t, isDirectMessageSnapshotRow("channels", map[string]any{"guild_id": directMessageGuildID}))
|
||||
require.True(t, isDirectMessageSnapshotRow("channels", map[string]any{"guild_id": twitterArchiveGuildID}))
|
||||
require.True(t, isDirectMessageSnapshotRow("sync_state", map[string]any{"scope": "wiretap:last_import"}))
|
||||
require.True(t, isDirectMessageSnapshotRow("sync_state", map[string]any{"scope": "twitter:last_import"}))
|
||||
require.False(t, isDirectMessageSnapshotRow("sync_state", map[string]any{"scope": "share:last_import"}))
|
||||
require.False(t, isDirectMessageSnapshotRow("custom", map[string]any{"guild_id": directMessageGuildID}))
|
||||
|
||||
@ -868,50 +852,6 @@ func seedDirectMessageData(t *testing.T, ctx context.Context, s *store.Store) {
|
||||
require.NoError(t, s.SetSyncState(ctx, "wiretap:last_import", now.Format(time.RFC3339)))
|
||||
}
|
||||
|
||||
func seedTwitterArchiveData(t *testing.T, ctx context.Context, s *store.Store) {
|
||||
t.Helper()
|
||||
now := time.Date(2026, 4, 24, 16, 0, 0, 0, time.UTC)
|
||||
require.NoError(t, s.UpsertGuild(ctx, store.GuildRecord{ID: twitterArchiveGuildID, Name: "X / Twitter Archive", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "x:tweets", GuildID: twitterArchiveGuildID, Kind: "tweet", Name: "tweets", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertMember(ctx, store.MemberRecord{
|
||||
GuildID: twitterArchiveGuildID,
|
||||
UserID: "25401953",
|
||||
Username: "steipete",
|
||||
DisplayName: "Peter",
|
||||
RoleIDsJSON: `[]`,
|
||||
RawJSON: `{}`,
|
||||
}))
|
||||
require.NoError(t, s.UpsertMessages(ctx, []store.MessageMutation{{
|
||||
Record: store.MessageRecord{
|
||||
ID: "x:tweet:1",
|
||||
GuildID: twitterArchiveGuildID,
|
||||
ChannelID: "x:tweets",
|
||||
ChannelName: "tweets",
|
||||
AuthorID: "25401953",
|
||||
AuthorName: "steipete",
|
||||
MessageType: 0,
|
||||
CreatedAt: now.Format(time.RFC3339Nano),
|
||||
Content: "local tweet content",
|
||||
NormalizedContent: "local tweet content",
|
||||
RawJSON: `{}`,
|
||||
},
|
||||
EventType: "twitter",
|
||||
PayloadJSON: `{"id":"x:tweet:1"}`,
|
||||
Options: store.WriteOptions{AppendEvent: true},
|
||||
Mentions: []store.MentionEventRecord{{
|
||||
MessageID: "x:tweet:1",
|
||||
GuildID: twitterArchiveGuildID,
|
||||
ChannelID: "x:tweets",
|
||||
AuthorID: "25401953",
|
||||
TargetType: "user",
|
||||
TargetID: "42",
|
||||
TargetName: "alice",
|
||||
EventAt: now.Format(time.RFC3339Nano),
|
||||
}},
|
||||
}}))
|
||||
require.NoError(t, s.SetSyncState(ctx, "twitter:last_import", now.Format(time.RFC3339)))
|
||||
}
|
||||
|
||||
func configureGitUser(t *testing.T, repo string) {
|
||||
t.Helper()
|
||||
// #nosec G204 -- fixed git argv in test setup.
|
||||
|
||||
@ -1,235 +0,0 @@
|
||||
package twitterarchive
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func classify(path string) string {
|
||||
normalized := filepath.ToSlash(path)
|
||||
base := filepath.Base(normalized)
|
||||
switch {
|
||||
case strings.HasSuffix(normalized, "/data/account.js") || normalized == "data/account.js":
|
||||
return "account"
|
||||
case (base == "tweets.js" || strings.HasPrefix(base, "tweets-part")) && strings.HasSuffix(base, ".js"):
|
||||
return "tweets"
|
||||
case base == "like.js" || base == "likes.js":
|
||||
return "likes"
|
||||
case (base == "direct-messages.js" || base == "direct-messages-group.js") && strings.HasSuffix(base, ".js"):
|
||||
return "dms"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func readZipText(file *zip.File) (string, error) {
|
||||
rc, err := file.Open()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer func() { _ = rc.Close() }()
|
||||
var b strings.Builder
|
||||
if _, err := io.Copy(&b, rc); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
func parseArchiveArray(content string) ([]map[string]any, error) {
|
||||
idx := strings.Index(content, "=")
|
||||
if idx < 0 {
|
||||
return nil, errors.New("missing assignment")
|
||||
}
|
||||
payload := strings.TrimSuffix(strings.TrimSpace(content[idx+1:]), ";")
|
||||
dec := json.NewDecoder(strings.NewReader(payload))
|
||||
dec.UseNumber()
|
||||
var records []map[string]any
|
||||
if err := dec.Decode(&records); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return records, nil
|
||||
}
|
||||
|
||||
func dmAttachments(messageID, channelID, authorID string, raw map[string]any) []store.AttachmentRecord {
|
||||
urls := stringSlice(raw["mediaUrls"])
|
||||
attachments := make([]store.AttachmentRecord, 0, len(urls))
|
||||
for i, url := range urls {
|
||||
attachments = append(attachments, store.AttachmentRecord{
|
||||
AttachmentID: fmt.Sprintf("x:dm:%s:media:%d", messageID, i),
|
||||
MessageID: "x:dm:" + messageID,
|
||||
GuildID: GuildID,
|
||||
ChannelID: channelID,
|
||||
AuthorID: authorID,
|
||||
Filename: filepath.Base(url),
|
||||
URL: url,
|
||||
})
|
||||
}
|
||||
return attachments
|
||||
}
|
||||
|
||||
func participantsForMessage(conversationID, senderID, recipientID string) []string {
|
||||
set := map[string]struct{}{}
|
||||
for _, value := range []string{senderID, recipientID} {
|
||||
if value != "" {
|
||||
set[value] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, part := range strings.Split(conversationID, "-") {
|
||||
if part != "" {
|
||||
set[part] = struct{}{}
|
||||
}
|
||||
}
|
||||
out := make([]string, 0, len(set))
|
||||
for id := range set {
|
||||
out = append(out, id)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
|
||||
func conversationName(conversationID, selfID string) string {
|
||||
parts := strings.Split(conversationID, "-")
|
||||
if len(parts) == 2 {
|
||||
for _, part := range parts {
|
||||
if part != "" && part != selfID {
|
||||
return "dm-" + shortID(part)
|
||||
}
|
||||
}
|
||||
}
|
||||
return "group-" + shortID(conversationID)
|
||||
}
|
||||
|
||||
func authorLabel(id string, acc account) string {
|
||||
if id == acc.ID && acc.Username != "" {
|
||||
return acc.Username
|
||||
}
|
||||
return "user-" + shortID(id)
|
||||
}
|
||||
|
||||
func parseTime(value string) time.Time {
|
||||
value = strings.TrimSpace(value)
|
||||
for _, layout := range []string{time.RFC3339Nano, time.RFC3339, "Mon Jan 02 15:04:05 -0700 2006"} {
|
||||
if t, err := time.Parse(layout, value); err == nil {
|
||||
return t
|
||||
}
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
func twitterSnowflakeTime(id string) time.Time {
|
||||
n, err := strconv.ParseInt(id, 10, 64)
|
||||
if err != nil || n <= 0 {
|
||||
return time.Time{}
|
||||
}
|
||||
const twitterEpochMs = int64(1288834974657)
|
||||
ms := (n >> 22) + twitterEpochMs
|
||||
return time.UnixMilli(ms).UTC()
|
||||
}
|
||||
|
||||
func sortedFiles(files []*zip.File) []*zip.File {
|
||||
out := append([]*zip.File(nil), files...)
|
||||
sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
|
||||
return out
|
||||
}
|
||||
|
||||
func sortedChannels(channels map[string]store.ChannelRecord) []store.ChannelRecord {
|
||||
out := make([]store.ChannelRecord, 0, len(channels))
|
||||
for _, ch := range channels {
|
||||
out = append(out, ch)
|
||||
}
|
||||
sort.Slice(out, func(i, j int) bool { return out[i].ID < out[j].ID })
|
||||
return out
|
||||
}
|
||||
|
||||
func sortedMembers(members map[string]store.MemberRecord) []store.MemberRecord {
|
||||
out := make([]store.MemberRecord, 0, len(members))
|
||||
for _, member := range members {
|
||||
out = append(out, member)
|
||||
}
|
||||
sort.Slice(out, func(i, j int) bool { return out[i].UserID < out[j].UserID })
|
||||
return out
|
||||
}
|
||||
|
||||
func memberKey(guildID, userID string) string { return guildID + "\x00" + userID }
|
||||
|
||||
func prefixedTweetID(id string) string {
|
||||
if id == "" {
|
||||
return ""
|
||||
}
|
||||
return "x:tweet:" + id
|
||||
}
|
||||
|
||||
func shortID(id string) string {
|
||||
if len(id) <= 8 {
|
||||
return id
|
||||
}
|
||||
return id[len(id)-8:]
|
||||
}
|
||||
|
||||
func rawJSON(value any) string {
|
||||
data, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
return `{}`
|
||||
}
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func stringValue(value any) string {
|
||||
switch typed := value.(type) {
|
||||
case string:
|
||||
return strings.TrimSpace(typed)
|
||||
case json.Number:
|
||||
return typed.String()
|
||||
case float64:
|
||||
return strconv.FormatInt(int64(typed), 10)
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func firstString(values ...any) string {
|
||||
for _, value := range values {
|
||||
if s := stringValue(value); s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func firstAny(values ...any) any {
|
||||
for _, value := range values {
|
||||
if stringValue(value) != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func nestedString(raw map[string]any, key, nested string) string {
|
||||
child, _ := raw[key].(map[string]any)
|
||||
return stringValue(child[nested])
|
||||
}
|
||||
|
||||
func stringSlice(value any) []string {
|
||||
raw, ok := value.([]any)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
out := make([]string, 0, len(raw))
|
||||
for _, item := range raw {
|
||||
if s := stringValue(item); s != "" {
|
||||
out = append(out, s)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
@ -1,396 +0,0 @@
|
||||
package twitterarchive
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"html"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
const (
|
||||
GuildID = "x"
|
||||
GuildName = "X / Twitter Archive"
|
||||
|
||||
tweetsChannelID = "x:tweets"
|
||||
likesChannelID = "x:likes"
|
||||
syncScope = "twitter:last_import"
|
||||
archiveScope = "twitter:last_archive"
|
||||
)
|
||||
|
||||
type Options struct {
|
||||
Path string
|
||||
DryRun bool
|
||||
Now func() time.Time
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
Path string `json:"path"`
|
||||
FilesScanned int `json:"files_scanned"`
|
||||
Accounts int `json:"accounts"`
|
||||
Tweets int `json:"tweets"`
|
||||
Likes int `json:"likes"`
|
||||
DMConversations int `json:"dm_conversations"`
|
||||
DMMessages int `json:"dm_messages"`
|
||||
Skipped int `json:"skipped"`
|
||||
DryRun bool `json:"dry_run,omitempty"`
|
||||
StartedAt time.Time `json:"started_at"`
|
||||
FinishedAt time.Time `json:"finished_at"`
|
||||
}
|
||||
|
||||
type account struct {
|
||||
ID string
|
||||
Username string
|
||||
DisplayName string
|
||||
Email string
|
||||
CreatedAt string
|
||||
Raw map[string]any
|
||||
}
|
||||
|
||||
type importSnapshot struct {
|
||||
account account
|
||||
channels map[string]store.ChannelRecord
|
||||
members map[string]store.MemberRecord
|
||||
messages []store.MessageMutation
|
||||
conversations map[string]struct{}
|
||||
}
|
||||
|
||||
func Import(ctx context.Context, st *store.Store, opts Options) (Stats, error) {
|
||||
if st == nil && !opts.DryRun {
|
||||
return Stats{}, errors.New("store is required")
|
||||
}
|
||||
if strings.TrimSpace(opts.Path) == "" {
|
||||
return Stats{}, errors.New("archive path is required")
|
||||
}
|
||||
now := opts.Now
|
||||
if now == nil {
|
||||
now = time.Now
|
||||
}
|
||||
stats := Stats{Path: opts.Path, DryRun: opts.DryRun, StartedAt: now().UTC()}
|
||||
reader, err := zip.OpenReader(opts.Path)
|
||||
if err != nil {
|
||||
stats.FinishedAt = now().UTC()
|
||||
return stats, fmt.Errorf("open twitter archive: %w", err)
|
||||
}
|
||||
defer func() { _ = reader.Close() }()
|
||||
|
||||
snap := importSnapshot{
|
||||
channels: map[string]store.ChannelRecord{},
|
||||
members: map[string]store.MemberRecord{},
|
||||
conversations: map[string]struct{}{},
|
||||
}
|
||||
snap.channels[tweetsChannelID] = channel(tweetsChannelID, "tweets", "tweet")
|
||||
snap.channels[likesChannelID] = channel(likesChannelID, "likes", "like")
|
||||
|
||||
for _, file := range sortedFiles(reader.File) {
|
||||
if ctx.Err() != nil {
|
||||
return stats, ctx.Err()
|
||||
}
|
||||
kind := classify(file.Name)
|
||||
if kind == "" {
|
||||
continue
|
||||
}
|
||||
content, err := readZipText(file)
|
||||
if err != nil {
|
||||
stats.Skipped++
|
||||
continue
|
||||
}
|
||||
records, err := parseArchiveArray(content)
|
||||
if err != nil {
|
||||
stats.Skipped++
|
||||
continue
|
||||
}
|
||||
stats.FilesScanned++
|
||||
switch kind {
|
||||
case "account":
|
||||
if parseAccount(records, &snap) {
|
||||
stats.Accounts = 1
|
||||
}
|
||||
case "tweets":
|
||||
stats.Tweets += parseTweets(records, &snap)
|
||||
case "likes":
|
||||
stats.Likes += parseLikes(records, &snap, now().UTC())
|
||||
case "dms":
|
||||
conversations, messages := parseDMs(records, &snap)
|
||||
stats.DMConversations += conversations
|
||||
stats.DMMessages += messages
|
||||
}
|
||||
}
|
||||
if snap.account.ID != "" {
|
||||
snap.members[memberKey(GuildID, snap.account.ID)] = memberFromAccount(snap.account)
|
||||
}
|
||||
for i := range snap.messages {
|
||||
if snap.messages[i].Record.AuthorID == "" && snap.account.ID != "" {
|
||||
snap.messages[i].Record.AuthorID = snap.account.ID
|
||||
snap.messages[i].Record.AuthorName = snap.account.Username
|
||||
}
|
||||
}
|
||||
if !opts.DryRun {
|
||||
if err := writeSnapshot(ctx, st, snap, opts.Path); err != nil {
|
||||
return stats, err
|
||||
}
|
||||
}
|
||||
stats.FinishedAt = now().UTC()
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func writeSnapshot(ctx context.Context, st *store.Store, snap importSnapshot, path string) error {
|
||||
if err := st.UpsertGuild(ctx, store.GuildRecord{ID: GuildID, Name: GuildName, RawJSON: rawJSON(map[string]any{"platform": "x"})}); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, member := range sortedMembers(snap.members) {
|
||||
if err := st.UpsertMember(ctx, member); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, ch := range sortedChannels(snap.channels) {
|
||||
if err := st.UpsertChannel(ctx, ch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
const chunkSize = 1000
|
||||
for start := 0; start < len(snap.messages); start += chunkSize {
|
||||
end := start + chunkSize
|
||||
if end > len(snap.messages) {
|
||||
end = len(snap.messages)
|
||||
}
|
||||
if err := st.UpsertMessages(ctx, snap.messages[start:end]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := st.SetSyncState(ctx, syncScope, time.Now().UTC().Format(time.RFC3339Nano)); err != nil {
|
||||
return err
|
||||
}
|
||||
return st.SetSyncState(ctx, archiveScope, path)
|
||||
}
|
||||
|
||||
func parseAccount(records []map[string]any, snap *importSnapshot) bool {
|
||||
for _, record := range records {
|
||||
raw, ok := record["account"].(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
acc := account{
|
||||
ID: stringValue(raw["accountId"]),
|
||||
Username: stringValue(raw["username"]),
|
||||
DisplayName: firstString(raw["accountDisplayName"], raw["name"]),
|
||||
Email: stringValue(raw["email"]),
|
||||
CreatedAt: stringValue(raw["createdAt"]),
|
||||
Raw: raw,
|
||||
}
|
||||
if acc.ID == "" || acc.Username == "" {
|
||||
continue
|
||||
}
|
||||
snap.account = acc
|
||||
snap.members[memberKey(GuildID, acc.ID)] = memberFromAccount(acc)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func parseTweets(records []map[string]any, snap *importSnapshot) int {
|
||||
count := 0
|
||||
for _, record := range records {
|
||||
raw, ok := record["tweet"].(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
id := firstString(raw["id_str"], raw["id"])
|
||||
text := html.UnescapeString(stringValue(raw["full_text"]))
|
||||
createdAt := parseTime(firstString(raw["created_at"], raw["createdAt"]))
|
||||
if id == "" || text == "" || createdAt.IsZero() {
|
||||
continue
|
||||
}
|
||||
authorID := firstString(raw["author_id"], raw["user_id_str"], nestedString(raw, "user", "id_str"), snap.account.ID)
|
||||
authorName := snap.account.Username
|
||||
msg := store.MessageMutation{
|
||||
Record: store.MessageRecord{
|
||||
ID: "x:tweet:" + id,
|
||||
GuildID: GuildID,
|
||||
ChannelID: tweetsChannelID,
|
||||
ChannelName: "tweets",
|
||||
AuthorID: authorID,
|
||||
AuthorName: authorName,
|
||||
MessageType: 0,
|
||||
CreatedAt: createdAt.UTC().Format(time.RFC3339Nano),
|
||||
Content: text,
|
||||
NormalizedContent: text,
|
||||
ReplyToMessageID: prefixedTweetID(firstString(raw["in_reply_to_status_id_str"], raw["in_reply_to_status_id"])),
|
||||
HasAttachments: hasMedia(raw),
|
||||
RawJSON: rawJSON(map[string]any{"platform": "x", "type": "tweet", "tweet": raw}),
|
||||
},
|
||||
Mentions: tweetMentions(id, authorID, createdAt, raw),
|
||||
}
|
||||
snap.messages = append(snap.messages, msg)
|
||||
count++
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func parseLikes(records []map[string]any, snap *importSnapshot, fallback time.Time) int {
|
||||
count := 0
|
||||
for _, record := range records {
|
||||
raw, ok := record["like"].(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
tweetID := stringValue(raw["tweetId"])
|
||||
text := html.UnescapeString(stringValue(raw["fullText"]))
|
||||
if tweetID == "" || text == "" {
|
||||
continue
|
||||
}
|
||||
createdAt := twitterSnowflakeTime(tweetID)
|
||||
if createdAt.IsZero() {
|
||||
createdAt = fallback
|
||||
}
|
||||
msg := store.MessageMutation{Record: store.MessageRecord{
|
||||
ID: "x:like:" + tweetID,
|
||||
GuildID: GuildID,
|
||||
ChannelID: likesChannelID,
|
||||
ChannelName: "likes",
|
||||
AuthorID: "x:liked",
|
||||
AuthorName: "liked",
|
||||
MessageType: 0,
|
||||
CreatedAt: createdAt.UTC().Format(time.RFC3339Nano),
|
||||
Content: text,
|
||||
NormalizedContent: text,
|
||||
RawJSON: rawJSON(map[string]any{"platform": "x", "type": "like", "like": raw}),
|
||||
}}
|
||||
snap.members[memberKey(GuildID, "x:liked")] = store.MemberRecord{GuildID: GuildID, UserID: "x:liked", Username: "liked", DisplayName: "Liked Tweets", RoleIDsJSON: `[]`, RawJSON: `{}`}
|
||||
snap.messages = append(snap.messages, msg)
|
||||
count++
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func parseDMs(records []map[string]any, snap *importSnapshot) (int, int) {
|
||||
conversations := 0
|
||||
messages := 0
|
||||
for _, record := range records {
|
||||
rawConv, ok := record["dmConversation"].(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
conversationID := stringValue(rawConv["conversationId"])
|
||||
rawMessages, ok := rawConv["messages"].([]any)
|
||||
if conversationID == "" || !ok {
|
||||
continue
|
||||
}
|
||||
channelID := "x:dm:" + conversationID
|
||||
if _, seen := snap.conversations[conversationID]; !seen {
|
||||
snap.conversations[conversationID] = struct{}{}
|
||||
snap.channels[channelID] = channel(channelID, conversationName(conversationID, snap.account.ID), "dm")
|
||||
conversations++
|
||||
}
|
||||
for _, item := range rawMessages {
|
||||
rawMessage, ok := item.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
messageCreate, ok := rawMessage["messageCreate"].(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
id := stringValue(messageCreate["id"])
|
||||
text := html.UnescapeString(stringValue(messageCreate["text"]))
|
||||
createdAt := parseTime(stringValue(messageCreate["createdAt"]))
|
||||
senderID := stringValue(messageCreate["senderId"])
|
||||
if id == "" || createdAt.IsZero() || (text == "" && len(stringSlice(messageCreate["mediaUrls"])) == 0) {
|
||||
continue
|
||||
}
|
||||
participants := participantsForMessage(conversationID, senderID, stringValue(messageCreate["recipientId"]))
|
||||
for _, participant := range participants {
|
||||
upsertGenericMember(snap, participant)
|
||||
}
|
||||
attachments := dmAttachments(id, channelID, senderID, messageCreate)
|
||||
snap.messages = append(snap.messages, store.MessageMutation{
|
||||
Record: store.MessageRecord{
|
||||
ID: "x:dm:" + id,
|
||||
GuildID: GuildID,
|
||||
ChannelID: channelID,
|
||||
ChannelName: snap.channels[channelID].Name,
|
||||
AuthorID: senderID,
|
||||
AuthorName: authorLabel(senderID, snap.account),
|
||||
MessageType: 0,
|
||||
CreatedAt: createdAt.UTC().Format(time.RFC3339Nano),
|
||||
Content: text,
|
||||
NormalizedContent: text,
|
||||
HasAttachments: len(attachments) > 0,
|
||||
RawJSON: rawJSON(map[string]any{"platform": "x", "type": "dm", "conversation_id": conversationID, "message": messageCreate}),
|
||||
},
|
||||
Attachments: attachments,
|
||||
})
|
||||
messages++
|
||||
}
|
||||
}
|
||||
return conversations, messages
|
||||
}
|
||||
|
||||
func channel(id, name, kind string) store.ChannelRecord {
|
||||
return store.ChannelRecord{ID: id, GuildID: GuildID, Kind: kind, Name: name, RawJSON: rawJSON(map[string]any{"platform": "x", "kind": kind})}
|
||||
}
|
||||
|
||||
func memberFromAccount(acc account) store.MemberRecord {
|
||||
return store.MemberRecord{
|
||||
GuildID: GuildID,
|
||||
UserID: acc.ID,
|
||||
Username: acc.Username,
|
||||
DisplayName: acc.DisplayName,
|
||||
JoinedAt: acc.CreatedAt,
|
||||
RoleIDsJSON: `[]`,
|
||||
RawJSON: rawJSON(map[string]any{"platform": "x", "account": acc.Raw, "email": acc.Email}),
|
||||
}
|
||||
}
|
||||
|
||||
func upsertGenericMember(snap *importSnapshot, id string) {
|
||||
if id == "" {
|
||||
return
|
||||
}
|
||||
if snap.account.ID == id {
|
||||
snap.members[memberKey(GuildID, id)] = memberFromAccount(snap.account)
|
||||
return
|
||||
}
|
||||
key := memberKey(GuildID, id)
|
||||
if _, ok := snap.members[key]; ok {
|
||||
return
|
||||
}
|
||||
snap.members[key] = store.MemberRecord{GuildID: GuildID, UserID: id, Username: "user-" + shortID(id), RoleIDsJSON: `[]`, RawJSON: rawJSON(map[string]any{"platform": "x"})}
|
||||
}
|
||||
|
||||
func tweetMentions(tweetID, authorID string, createdAt time.Time, raw map[string]any) []store.MentionEventRecord {
|
||||
entities, _ := raw["entities"].(map[string]any)
|
||||
rawMentions, _ := entities["user_mentions"].([]any)
|
||||
var mentions []store.MentionEventRecord
|
||||
for _, item := range rawMentions {
|
||||
mention, ok := item.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
targetID := stringValue(firstAny(mention["id_str"], mention["id"]))
|
||||
targetName := firstString(mention["screen_name"], mention["name"])
|
||||
if targetID == "" && targetName == "" {
|
||||
continue
|
||||
}
|
||||
mentions = append(mentions, store.MentionEventRecord{
|
||||
MessageID: "x:tweet:" + tweetID,
|
||||
GuildID: GuildID,
|
||||
ChannelID: tweetsChannelID,
|
||||
AuthorID: authorID,
|
||||
TargetType: "user",
|
||||
TargetID: targetID,
|
||||
TargetName: targetName,
|
||||
EventAt: createdAt.UTC().Format(time.RFC3339Nano),
|
||||
})
|
||||
}
|
||||
return mentions
|
||||
}
|
||||
|
||||
func hasMedia(raw map[string]any) bool {
|
||||
extended, _ := raw["extended_entities"].(map[string]any)
|
||||
media, _ := extended["media"].([]any)
|
||||
return len(media) > 0
|
||||
}
|
||||
@ -1,121 +0,0 @@
|
||||
package twitterarchive
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestImportTwitterArchiveWritesSearchableMessages(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
archivePath := filepath.Join(dir, "twitter.zip")
|
||||
writeTestArchive(t, archivePath)
|
||||
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{
|
||||
Path: archivePath,
|
||||
Now: func() time.Time { return time.Date(2026, 4, 26, 12, 0, 0, 0, time.UTC) },
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.Accounts)
|
||||
require.Equal(t, 1, stats.Tweets)
|
||||
require.Equal(t, 1, stats.Likes)
|
||||
require.Equal(t, 1, stats.DMConversations)
|
||||
require.Equal(t, 1, stats.DMMessages)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "pull requests", GuildIDs: []string{GuildID}, Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
require.Equal(t, "x:tweet:1952542067017584782", results[0].MessageID)
|
||||
require.Equal(t, "steipete", results[0].AuthorName)
|
||||
|
||||
results, err = st.SearchMessages(ctx, store.SearchOptions{Query: "secret roadmap", GuildIDs: []string{GuildID}, Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
require.Equal(t, "x:dm:1052590933307461636", results[0].MessageID)
|
||||
|
||||
cursor, err := st.GetSyncState(ctx, archiveScope)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, archivePath, cursor)
|
||||
}
|
||||
|
||||
func TestImportTwitterArchiveDryRunDoesNotWrite(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
archivePath := filepath.Join(dir, "twitter.zip")
|
||||
writeTestArchive(t, archivePath)
|
||||
|
||||
stats, err := Import(ctx, nil, Options{Path: archivePath, DryRun: true})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.Tweets)
|
||||
require.True(t, stats.DryRun)
|
||||
}
|
||||
|
||||
func writeTestArchive(t *testing.T, path string) {
|
||||
t.Helper()
|
||||
file, err := os.Create(path)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = file.Close() }()
|
||||
|
||||
zw := zip.NewWriter(file)
|
||||
defer func() { require.NoError(t, zw.Close()) }()
|
||||
|
||||
writeZipEntry(t, zw, "data/account.js", `window.YTD.account.part0 = [{
|
||||
"account": {
|
||||
"email": "steipete@gmail.com",
|
||||
"username": "steipete",
|
||||
"accountId": "25401953",
|
||||
"createdAt": "2009-03-19T22:54:05.000Z",
|
||||
"accountDisplayName": "Peter Steinberger"
|
||||
}
|
||||
}]`)
|
||||
writeZipEntry(t, zw, "data/tweets.js", `window.YTD.tweets.part0 = [{
|
||||
"tweet": {
|
||||
"id_str": "1952542067017584782",
|
||||
"created_at": "Tue Aug 05 01:27:59 +0000 2025",
|
||||
"full_text": "Getting pull requests with zero user testing aint it.",
|
||||
"entities": {"user_mentions": [{"screen_name": "alice", "id_str": "42"}]}
|
||||
}
|
||||
}]`)
|
||||
writeZipEntry(t, zw, "data/like.js", `window.YTD.like.part0 = [{
|
||||
"like": {
|
||||
"tweetId": "1952539858771275983",
|
||||
"fullText": "Liked archive import smoke"
|
||||
}
|
||||
}]`)
|
||||
writeZipEntry(t, zw, "data/direct-messages.js", `window.YTD.direct_messages.part0 = [{
|
||||
"dmConversation": {
|
||||
"conversationId": "929-25401953",
|
||||
"messages": [{
|
||||
"messageCreate": {
|
||||
"recipientId": "929",
|
||||
"senderId": "25401953",
|
||||
"id": "1052590933307461636",
|
||||
"createdAt": "2018-10-17T16:03:29.391Z",
|
||||
"text": "secret roadmap",
|
||||
"mediaUrls": [],
|
||||
"urls": []
|
||||
}
|
||||
}]
|
||||
}
|
||||
}]`)
|
||||
}
|
||||
|
||||
func writeZipEntry(t *testing.T, zw *zip.Writer, name, body string) {
|
||||
t.Helper()
|
||||
w, err := zw.Create(name)
|
||||
require.NoError(t, err)
|
||||
_, err = w.Write([]byte(body))
|
||||
require.NoError(t, err)
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user