From c4be70e52191acbbd0abd091f94a48c0e53a2a83 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 1 May 2026 16:09:36 -0700 Subject: [PATCH] feat(cli): add crawlkit control surface --- go.mod | 2 +- go.sum | 2 + internal/cli/admin_commands.go | 41 ++++++++++++-- internal/cli/cli.go | 48 ++++++++++++++-- internal/cli/control_commands.go | 96 ++++++++++++++++++++++++++++++++ internal/cli/output.go | 4 ++ internal/cli/tui_commands.go | 17 +++++- internal/store/store.go | 16 ++++++ 8 files changed, 215 insertions(+), 11 deletions(-) create mode 100644 internal/cli/control_commands.go diff --git a/go.mod b/go.mod index 70055c6..d72d20e 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,7 @@ require ( github.com/ncruces/go-strftime v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect - github.com/vincentkoc/crawlkit v0.3.1 + github.com/vincentkoc/crawlkit v0.3.2 golang.org/x/crypto v0.50.0 // indirect golang.org/x/tools v0.44.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect diff --git a/go.sum b/go.sum index 4fdb2b0..6b31c09 100644 --- a/go.sum +++ b/go.sum @@ -75,6 +75,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/vincentkoc/crawlkit v0.3.1 h1:z5q7s+oAkdlgGjdT1N/CsWkc4GZ2uNqCmVDtTd2tQSM= github.com/vincentkoc/crawlkit v0.3.1/go.mod h1:Zp6k0f6owZ81wccG26jPbLSDGmfjoxPdzgPXZcUpmW4= +github.com/vincentkoc/crawlkit v0.3.2 h1:/K8GZvgGtYtZY3iaLPhXw+N50sOYrnaQO+egHD9HcAE= +github.com/vincentkoc/crawlkit v0.3.2/go.mod h1:Zp6k0f6owZ81wccG26jPbLSDGmfjoxPdzgPXZcUpmW4= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs= diff --git a/internal/cli/admin_commands.go b/internal/cli/admin_commands.go index 5ccbe9b..e6b0871 100644 --- a/internal/cli/admin_commands.go +++ b/internal/cli/admin_commands.go @@ -17,6 +17,7 @@ import ( "github.com/openclaw/discrawl/internal/discord" "github.com/openclaw/discrawl/internal/discorddesktop" "github.com/openclaw/discrawl/internal/embed" + "github.com/openclaw/discrawl/internal/share" "github.com/openclaw/discrawl/internal/store" "github.com/openclaw/discrawl/internal/syncer" ) @@ -314,16 +315,37 @@ func (r *runtime) runWiretap(args []string) error { } func (r *runtime) runStatus(args []string) error { - if len(args) != 0 { + fs := flag.NewFlagSet("status", flag.ContinueOnError) + fs.SetOutput(io.Discard) + jsonOut := fs.Bool("json", false, "") + if err := fs.Parse(args); err != nil { + return usageErr(err) + } + if fs.NArg() != 0 { return usageErr(errors.New("status takes no arguments")) } + if *jsonOut { + r.json = true + } dbPath, err := config.ExpandPath(r.cfg.DBPath) if err != nil { return configErr(err) } - status, err := r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID()) - if err != nil { - return err + status := store.Status{DBPath: dbPath, DefaultGuildID: r.cfg.EffectiveDefaultGuildID()} + if r.store != nil { + status, err = r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID()) + if err != nil { + return err + } + } + if r.json { + needsUpdate := false + if r.store != nil && r.cfg.ShareEnabled() { + if staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter); err == nil { + needsUpdate = share.NeedsImport(r.ctx, r.store, staleAfter) + } + } + return r.print(controlStatus(r.configPath, r.cfg, status, needsUpdate)) } return r.print(status) } @@ -384,9 +406,18 @@ func (r *runtime) runEmbed(args []string) error { } func (r *runtime) runDoctor(args []string) error { - if len(args) != 0 { + fs := flag.NewFlagSet("doctor", flag.ContinueOnError) + fs.SetOutput(io.Discard) + jsonOut := fs.Bool("json", false, "") + if err := fs.Parse(args); err != nil { + return usageErr(err) + } + if fs.NArg() != 0 { return usageErr(errors.New("doctor takes no arguments")) } + if *jsonOut { + r.json = true + } report := map[string]any{ "config_path": r.configPath, } diff --git a/internal/cli/cli.go b/internal/cli/cli.go index a67279e..359bf83 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -47,6 +47,10 @@ func ExitCode(err error) int { } func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error { + if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" { + printUsage(stdout) + return nil + } global := flag.NewFlagSet("discrawl", flag.ContinueOnError) global.SetOutput(io.Discard) configPath := global.String("config", "", "") @@ -66,10 +70,14 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error { return nil } rest := global.Args() - if len(rest) == 0 || rest[0] == "help" { + if len(rest) == 0 || rest[0] == "help" || rest[0] == "--help" || rest[0] == "-h" { printUsage(stdout) return nil } + if rest[0] == "version" { + _, _ = io.WriteString(stdout, version+"\n") + return nil + } level := slog.LevelInfo if *quiet { level = slog.LevelError @@ -129,6 +137,8 @@ type attachmentTextConfigurer interface { func (r *runtime) dispatch(rest []string) error { switch rest[0] { + case "metadata": + return r.runMetadata(rest[1:]) case "init": return r.runInit(rest[1:]) case "sync": @@ -141,12 +151,13 @@ func (r *runtime) dispatch(rest []string) error { return r.withServicesLocked(true, func() error { return r.runTail(rest[1:]) }) case "wiretap": return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) }) + case "tap", "cache-import": + return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) }) case "search": autoShareUpdate := !hasBoolFlag(rest[1:], "--dm") return r.withLocalStoreDefaultLocked(autoShareUpdate, autoShareUpdate, func() error { return r.runSearch(rest[1:]) }) case "tui": - autoShareUpdate := !hasBoolFlag(rest[1:], "--dm") - return r.withLocalStoreDefaultLocked(autoShareUpdate, autoShareUpdate, func() error { return r.runTUI(rest[1:]) }) + return r.withLocalStoreReadOnly(func() error { return r.runTUI(rest[1:]) }) case "messages": if hasBoolFlag(rest[1:], "--sync") && !hasBoolFlag(rest[1:], "--dm") { return r.withServicesAutoLocked(true, true, true, func() error { return r.runMessages(rest[1:]) }) @@ -170,7 +181,7 @@ func (r *runtime) dispatch(rest []string) error { case "channels": return r.withLocalStoreLocked(true, func() error { return r.runChannels(rest[1:]) }) case "status": - return r.withLocalStoreLocked(true, func() error { return r.runStatus(rest[1:]) }) + return r.withLocalStoreReadOnly(func() error { return r.runStatus(rest[1:]) }) case "report": return r.withLocalStoreLocked(true, func() error { return r.runReport(rest[1:]) }) case "publish": @@ -252,6 +263,35 @@ func (r *runtime) openLocalStore(dbPath string, updateMode shareUpdateMode, fn f return fn() } +func (r *runtime) withLocalStoreReadOnly(fn func() error) error { + cfg, err := config.Load(r.configPath) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return configErr(err) + } + cfg = config.Default() + if err := cfg.Normalize(); err != nil { + return configErr(err) + } + } + dbPath, err := config.ExpandPath(cfg.DBPath) + if err != nil { + return configErr(err) + } + r.cfg = cfg + var openErr error + r.store, openErr = store.OpenReadOnly(r.ctx, dbPath) + if openErr != nil { + if errors.Is(openErr, os.ErrNotExist) { + r.store = nil + return fn() + } + return dbErr(openErr) + } + defer func() { _ = r.store.Close() }() + return fn() +} + func (r *runtime) withServicesAuto(withDiscord, autoShareUpdate bool, fn func() error) error { return r.withServicesAutoLocked(withDiscord, autoShareUpdate, false, fn) } diff --git a/internal/cli/control_commands.go b/internal/cli/control_commands.go new file mode 100644 index 0000000..681bd0f --- /dev/null +++ b/internal/cli/control_commands.go @@ -0,0 +1,96 @@ +package cli + +import ( + "errors" + "flag" + "fmt" + "io" + "os" + "time" + + "github.com/openclaw/discrawl/internal/config" + "github.com/openclaw/discrawl/internal/store" + "github.com/vincentkoc/crawlkit/control" +) + +func (r *runtime) runMetadata(args []string) error { + fs := flag.NewFlagSet("metadata", flag.ContinueOnError) + fs.SetOutput(io.Discard) + jsonOut := fs.Bool("json", false, "") + if err := fs.Parse(args); err != nil { + return usageErr(err) + } + if fs.NArg() != 0 { + return usageErr(errors.New("metadata takes flags only")) + } + if *jsonOut { + r.json = true + } + cfg := config.Default() + manifest := control.NewManifest("discrawl", "Discord Crawl", "discrawl") + manifest.Description = "Local-first Discord archive crawler." + manifest.Branding = control.Branding{SymbolName: "bubble.left.and.bubble.right.fill", AccentColor: "#5865f2", BundleIdentifier: "com.hnc.Discord"} + manifest.Paths = control.Paths{ + DefaultConfig: config.ResolvePath(""), + ConfigEnv: config.DefaultConfigEnv, + DefaultDatabase: cfg.DBPath, + DefaultCache: cfg.CacheDir, + DefaultLogs: cfg.LogDir, + DefaultShare: cfg.Share.RepoPath, + } + manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "tap", "tui", "git-share", "sql", "embeddings"} + manifest.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"discord", "desktop-cache", "sqlite", "git-share"}} + manifest.Commands = map[string]control.Command{ + "status": {Title: "Status", Argv: []string{"discrawl", "status", "--json"}, JSON: true}, + "doctor": {Title: "Doctor", Argv: []string{"discrawl", "doctor", "--json"}, JSON: true}, + "sync": {Title: "Sync", Argv: []string{"discrawl", "--json", "sync"}, JSON: true, Mutates: true}, + "tap": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "tap"}, JSON: true, Mutates: true}, + "cache-import": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "cache-import"}, JSON: true, Mutates: true}, + "wiretap": {Title: "Legacy desktop cache import", Argv: []string{"discrawl", "--json", "wiretap"}, JSON: true, Mutates: true, Legacy: true, Deprecated: true}, + "tui": {Title: "Terminal browser", Argv: []string{"discrawl", "tui"}}, + "tui-json": {Title: "Terminal browser rows", Argv: []string{"discrawl", "tui", "--json"}, JSON: true}, + "publish": {Title: "Publish share", Argv: []string{"discrawl", "--json", "publish"}, JSON: true, Mutates: true}, + "subscribe": {Title: "Subscribe share", Argv: []string{"discrawl", "--json", "subscribe"}, JSON: true, Mutates: true}, + "update": {Title: "Update share", Argv: []string{"discrawl", "--json", "update"}, JSON: true, Mutates: true}, + } + return r.print(manifest) +} + +func controlStatus(configPath string, cfg config.Config, status store.Status, shareNeedsUpdate bool) control.Status { + counts := []control.Count{ + control.NewCount("guilds", "Guilds", int64(status.GuildCount)), + control.NewCount("channels", "Channels", int64(status.ChannelCount)), + control.NewCount("threads", "Threads", int64(status.ThreadCount)), + control.NewCount("messages", "Messages", int64(status.MessageCount)), + control.NewCount("members", "Members", int64(status.MemberCount)), + control.NewCount("embedding_backlog", "Embedding backlog", int64(status.EmbeddingBacklog)), + } + out := control.NewStatus("discrawl", fmt.Sprintf("%d messages across %d channels", status.MessageCount, status.ChannelCount)) + out.State = "current" + out.ConfigPath = configPath + out.DatabasePath = status.DBPath + out.Counts = counts + if !status.LastSyncAt.IsZero() { + out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339) + } + db := control.SQLiteDatabase("primary", "Discord archive", "archive", status.DBPath, true, counts) + out.DatabaseBytes = db.Bytes + out.WALBytes = fileSize(status.DBPath + "-wal") + out.Databases = []control.Database{db} + out.Share = &control.Share{ + Enabled: cfg.ShareEnabled(), + RepoPath: cfg.Share.RepoPath, + Remote: cfg.Share.Remote, + Branch: cfg.Share.Branch, + NeedsUpdate: shareNeedsUpdate, + } + return out +} + +func fileSize(path string) int64 { + info, err := os.Stat(path) + if err != nil { + return 0 + } + return info.Size() +} diff --git a/internal/cli/output.go b/internal/cli/output.go index 1ac855e..5aa3a99 100644 --- a/internal/cli/output.go +++ b/internal/cli/output.go @@ -100,9 +100,13 @@ Usage: discrawl [global flags] [args] Commands: + metadata + version init sync tail + tap + cache-import wiretap search tui diff --git a/internal/cli/tui_commands.go b/internal/cli/tui_commands.go index b1967f7..271c20e 100644 --- a/internal/cli/tui_commands.go +++ b/internal/cli/tui_commands.go @@ -8,7 +8,7 @@ import ( "github.com/vincentkoc/crawlkit/tui" - "github.com/steipete/discrawl/internal/store" + "github.com/openclaw/discrawl/internal/store" ) func (r *runtime) runTUI(args []string) error { @@ -21,9 +21,13 @@ func (r *runtime) runTUI(args []string) error { dm := fs.Bool("dm", false, "") guildsFlag := fs.String("guilds", "", "") guildFlag := fs.String("guild", "", "") + jsonOut := fs.Bool("json", false, "") if err := fs.Parse(args); err != nil { return usageErr(err) } + if *jsonOut { + r.json = true + } if fs.NArg() != 0 { return usageErr(errors.New("tui takes flags only")) } @@ -34,6 +38,16 @@ func (r *runtime) runTUI(args []string) error { if err != nil { return usageErr(err) } + if r.store == nil { + return tui.Browse(r.ctx, tui.BrowseOptions{ + AppName: "discrawl", + Title: "discrawl archive", + EmptyMessage: "discrawl has no local messages yet", + JSON: r.json, + Layout: tui.LayoutChat, + Stdout: r.stdout, + }) + } rows, err := r.store.ListMessages(r.ctx, store.MessageListOptions{ GuildIDs: guildIDs, Channel: *channel, @@ -50,6 +64,7 @@ func (r *runtime) runTUI(args []string) error { EmptyMessage: "discrawl has no local messages yet", Rows: discordTUIRows(rows), JSON: r.json, + Layout: tui.LayoutChat, Stdout: r.stdout, }) } diff --git a/internal/store/store.go b/internal/store/store.go index 3108e5e..23d1e6e 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -124,6 +124,22 @@ func Open(ctx context.Context, path string) (*Store, error) { return store, nil } +func OpenReadOnly(ctx context.Context, path string) (*Store, error) { + base, err := crawlstore.OpenReadOnly(ctx, path) + if err != nil { + return nil, err + } + store := &Store{db: base.DB(), path: path} + if version, err := store.schemaVersion(ctx); err != nil { + _ = base.Close() + return nil, err + } else if version != storeSchemaVersion { + _ = base.Close() + return nil, fmt.Errorf("database schema version mismatch: got %d want %d", version, storeSchemaVersion) + } + return store, nil +} + func (s *Store) Close() error { if s == nil || s.db == nil { return nil