feat(cli): add crawlkit control surface

This commit is contained in:
Vincent Koc 2026-05-01 16:09:36 -07:00
parent 5e5c401531
commit c4be70e521
No known key found for this signature in database
8 changed files with 215 additions and 11 deletions

2
go.mod
View File

@ -41,7 +41,7 @@ require (
github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/vincentkoc/crawlkit v0.3.1
github.com/vincentkoc/crawlkit v0.3.2
golang.org/x/crypto v0.50.0 // indirect
golang.org/x/tools v0.44.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect

2
go.sum
View File

@ -75,6 +75,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/vincentkoc/crawlkit v0.3.1 h1:z5q7s+oAkdlgGjdT1N/CsWkc4GZ2uNqCmVDtTd2tQSM=
github.com/vincentkoc/crawlkit v0.3.1/go.mod h1:Zp6k0f6owZ81wccG26jPbLSDGmfjoxPdzgPXZcUpmW4=
github.com/vincentkoc/crawlkit v0.3.2 h1:/K8GZvgGtYtZY3iaLPhXw+N50sOYrnaQO+egHD9HcAE=
github.com/vincentkoc/crawlkit v0.3.2/go.mod h1:Zp6k0f6owZ81wccG26jPbLSDGmfjoxPdzgPXZcUpmW4=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs=

View File

@ -17,6 +17,7 @@ import (
"github.com/openclaw/discrawl/internal/discord"
"github.com/openclaw/discrawl/internal/discorddesktop"
"github.com/openclaw/discrawl/internal/embed"
"github.com/openclaw/discrawl/internal/share"
"github.com/openclaw/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/syncer"
)
@ -314,16 +315,37 @@ func (r *runtime) runWiretap(args []string) error {
}
func (r *runtime) runStatus(args []string) error {
if len(args) != 0 {
fs := flag.NewFlagSet("status", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("status takes no arguments"))
}
if *jsonOut {
r.json = true
}
dbPath, err := config.ExpandPath(r.cfg.DBPath)
if err != nil {
return configErr(err)
}
status, err := r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
if err != nil {
return err
status := store.Status{DBPath: dbPath, DefaultGuildID: r.cfg.EffectiveDefaultGuildID()}
if r.store != nil {
status, err = r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
if err != nil {
return err
}
}
if r.json {
needsUpdate := false
if r.store != nil && r.cfg.ShareEnabled() {
if staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter); err == nil {
needsUpdate = share.NeedsImport(r.ctx, r.store, staleAfter)
}
}
return r.print(controlStatus(r.configPath, r.cfg, status, needsUpdate))
}
return r.print(status)
}
@ -384,9 +406,18 @@ func (r *runtime) runEmbed(args []string) error {
}
func (r *runtime) runDoctor(args []string) error {
if len(args) != 0 {
fs := flag.NewFlagSet("doctor", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("doctor takes no arguments"))
}
if *jsonOut {
r.json = true
}
report := map[string]any{
"config_path": r.configPath,
}

View File

@ -47,6 +47,10 @@ func ExitCode(err error) int {
}
func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
printUsage(stdout)
return nil
}
global := flag.NewFlagSet("discrawl", flag.ContinueOnError)
global.SetOutput(io.Discard)
configPath := global.String("config", "", "")
@ -66,10 +70,14 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
return nil
}
rest := global.Args()
if len(rest) == 0 || rest[0] == "help" {
if len(rest) == 0 || rest[0] == "help" || rest[0] == "--help" || rest[0] == "-h" {
printUsage(stdout)
return nil
}
if rest[0] == "version" {
_, _ = io.WriteString(stdout, version+"\n")
return nil
}
level := slog.LevelInfo
if *quiet {
level = slog.LevelError
@ -129,6 +137,8 @@ type attachmentTextConfigurer interface {
func (r *runtime) dispatch(rest []string) error {
switch rest[0] {
case "metadata":
return r.runMetadata(rest[1:])
case "init":
return r.runInit(rest[1:])
case "sync":
@ -141,12 +151,13 @@ func (r *runtime) dispatch(rest []string) error {
return r.withServicesLocked(true, func() error { return r.runTail(rest[1:]) })
case "wiretap":
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
case "tap", "cache-import":
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
case "search":
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
return r.withLocalStoreDefaultLocked(autoShareUpdate, autoShareUpdate, func() error { return r.runSearch(rest[1:]) })
case "tui":
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
return r.withLocalStoreDefaultLocked(autoShareUpdate, autoShareUpdate, func() error { return r.runTUI(rest[1:]) })
return r.withLocalStoreReadOnly(func() error { return r.runTUI(rest[1:]) })
case "messages":
if hasBoolFlag(rest[1:], "--sync") && !hasBoolFlag(rest[1:], "--dm") {
return r.withServicesAutoLocked(true, true, true, func() error { return r.runMessages(rest[1:]) })
@ -170,7 +181,7 @@ func (r *runtime) dispatch(rest []string) error {
case "channels":
return r.withLocalStoreLocked(true, func() error { return r.runChannels(rest[1:]) })
case "status":
return r.withLocalStoreLocked(true, func() error { return r.runStatus(rest[1:]) })
return r.withLocalStoreReadOnly(func() error { return r.runStatus(rest[1:]) })
case "report":
return r.withLocalStoreLocked(true, func() error { return r.runReport(rest[1:]) })
case "publish":
@ -252,6 +263,35 @@ func (r *runtime) openLocalStore(dbPath string, updateMode shareUpdateMode, fn f
return fn()
}
func (r *runtime) withLocalStoreReadOnly(fn func() error) error {
cfg, err := config.Load(r.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return configErr(err)
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return configErr(err)
}
}
dbPath, err := config.ExpandPath(cfg.DBPath)
if err != nil {
return configErr(err)
}
r.cfg = cfg
var openErr error
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
if openErr != nil {
if errors.Is(openErr, os.ErrNotExist) {
r.store = nil
return fn()
}
return dbErr(openErr)
}
defer func() { _ = r.store.Close() }()
return fn()
}
func (r *runtime) withServicesAuto(withDiscord, autoShareUpdate bool, fn func() error) error {
return r.withServicesAutoLocked(withDiscord, autoShareUpdate, false, fn)
}

View File

@ -0,0 +1,96 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"os"
"time"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/store"
"github.com/vincentkoc/crawlkit/control"
)
func (r *runtime) runMetadata(args []string) error {
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("metadata takes flags only"))
}
if *jsonOut {
r.json = true
}
cfg := config.Default()
manifest := control.NewManifest("discrawl", "Discord Crawl", "discrawl")
manifest.Description = "Local-first Discord archive crawler."
manifest.Branding = control.Branding{SymbolName: "bubble.left.and.bubble.right.fill", AccentColor: "#5865f2", BundleIdentifier: "com.hnc.Discord"}
manifest.Paths = control.Paths{
DefaultConfig: config.ResolvePath(""),
ConfigEnv: config.DefaultConfigEnv,
DefaultDatabase: cfg.DBPath,
DefaultCache: cfg.CacheDir,
DefaultLogs: cfg.LogDir,
DefaultShare: cfg.Share.RepoPath,
}
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "tap", "tui", "git-share", "sql", "embeddings"}
manifest.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"discord", "desktop-cache", "sqlite", "git-share"}}
manifest.Commands = map[string]control.Command{
"status": {Title: "Status", Argv: []string{"discrawl", "status", "--json"}, JSON: true},
"doctor": {Title: "Doctor", Argv: []string{"discrawl", "doctor", "--json"}, JSON: true},
"sync": {Title: "Sync", Argv: []string{"discrawl", "--json", "sync"}, JSON: true, Mutates: true},
"tap": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "tap"}, JSON: true, Mutates: true},
"cache-import": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "cache-import"}, JSON: true, Mutates: true},
"wiretap": {Title: "Legacy desktop cache import", Argv: []string{"discrawl", "--json", "wiretap"}, JSON: true, Mutates: true, Legacy: true, Deprecated: true},
"tui": {Title: "Terminal browser", Argv: []string{"discrawl", "tui"}},
"tui-json": {Title: "Terminal browser rows", Argv: []string{"discrawl", "tui", "--json"}, JSON: true},
"publish": {Title: "Publish share", Argv: []string{"discrawl", "--json", "publish"}, JSON: true, Mutates: true},
"subscribe": {Title: "Subscribe share", Argv: []string{"discrawl", "--json", "subscribe"}, JSON: true, Mutates: true},
"update": {Title: "Update share", Argv: []string{"discrawl", "--json", "update"}, JSON: true, Mutates: true},
}
return r.print(manifest)
}
func controlStatus(configPath string, cfg config.Config, status store.Status, shareNeedsUpdate bool) control.Status {
counts := []control.Count{
control.NewCount("guilds", "Guilds", int64(status.GuildCount)),
control.NewCount("channels", "Channels", int64(status.ChannelCount)),
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
control.NewCount("messages", "Messages", int64(status.MessageCount)),
control.NewCount("members", "Members", int64(status.MemberCount)),
control.NewCount("embedding_backlog", "Embedding backlog", int64(status.EmbeddingBacklog)),
}
out := control.NewStatus("discrawl", fmt.Sprintf("%d messages across %d channels", status.MessageCount, status.ChannelCount))
out.State = "current"
out.ConfigPath = configPath
out.DatabasePath = status.DBPath
out.Counts = counts
if !status.LastSyncAt.IsZero() {
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
}
db := control.SQLiteDatabase("primary", "Discord archive", "archive", status.DBPath, true, counts)
out.DatabaseBytes = db.Bytes
out.WALBytes = fileSize(status.DBPath + "-wal")
out.Databases = []control.Database{db}
out.Share = &control.Share{
Enabled: cfg.ShareEnabled(),
RepoPath: cfg.Share.RepoPath,
Remote: cfg.Share.Remote,
Branch: cfg.Share.Branch,
NeedsUpdate: shareNeedsUpdate,
}
return out
}
func fileSize(path string) int64 {
info, err := os.Stat(path)
if err != nil {
return 0
}
return info.Size()
}

View File

@ -100,9 +100,13 @@ Usage:
discrawl [global flags] <command> [args]
Commands:
metadata
version
init
sync
tail
tap
cache-import
wiretap
search
tui

View File

@ -8,7 +8,7 @@ import (
"github.com/vincentkoc/crawlkit/tui"
"github.com/steipete/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/store"
)
func (r *runtime) runTUI(args []string) error {
@ -21,9 +21,13 @@ func (r *runtime) runTUI(args []string) error {
dm := fs.Bool("dm", false, "")
guildsFlag := fs.String("guilds", "", "")
guildFlag := fs.String("guild", "", "")
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if *jsonOut {
r.json = true
}
if fs.NArg() != 0 {
return usageErr(errors.New("tui takes flags only"))
}
@ -34,6 +38,16 @@ func (r *runtime) runTUI(args []string) error {
if err != nil {
return usageErr(err)
}
if r.store == nil {
return tui.Browse(r.ctx, tui.BrowseOptions{
AppName: "discrawl",
Title: "discrawl archive",
EmptyMessage: "discrawl has no local messages yet",
JSON: r.json,
Layout: tui.LayoutChat,
Stdout: r.stdout,
})
}
rows, err := r.store.ListMessages(r.ctx, store.MessageListOptions{
GuildIDs: guildIDs,
Channel: *channel,
@ -50,6 +64,7 @@ func (r *runtime) runTUI(args []string) error {
EmptyMessage: "discrawl has no local messages yet",
Rows: discordTUIRows(rows),
JSON: r.json,
Layout: tui.LayoutChat,
Stdout: r.stdout,
})
}

View File

@ -124,6 +124,22 @@ func Open(ctx context.Context, path string) (*Store, error) {
return store, nil
}
func OpenReadOnly(ctx context.Context, path string) (*Store, error) {
base, err := crawlstore.OpenReadOnly(ctx, path)
if err != nil {
return nil, err
}
store := &Store{db: base.DB(), path: path}
if version, err := store.schemaVersion(ctx); err != nil {
_ = base.Close()
return nil, err
} else if version != storeSchemaVersion {
_ = base.Close()
return nil, fmt.Errorf("database schema version mismatch: got %d want %d", version, storeSchemaVersion)
}
return store, nil
}
func (s *Store) Close() error {
if s == nil || s.db == nil {
return nil