feat: wire notioncrawl cli

This commit is contained in:
Vincent Koc 2026-04-22 15:58:40 -07:00
parent a73fd87ee8
commit 63f6f8e6d2
No known key found for this signature in database

View File

@ -1,15 +1,352 @@
package main
import (
"context"
"database/sql"
"encoding/json"
"flag"
"fmt"
"io"
"os"
"strings"
"github.com/vincentkoc/notioncrawl/internal/config"
"github.com/vincentkoc/notioncrawl/internal/markdown"
"github.com/vincentkoc/notioncrawl/internal/notionapi"
"github.com/vincentkoc/notioncrawl/internal/notiondesktop"
"github.com/vincentkoc/notioncrawl/internal/share"
"github.com/vincentkoc/notioncrawl/internal/store"
)
func main() {
if len(os.Args) > 1 && (os.Args[1] == "-h" || os.Args[1] == "--help") {
fmt.Print("Usage of notioncrawl:\n notioncrawl [global flags] <command> [args]\n")
return
if err := run(context.Background(), os.Args[1:], os.Stdout, os.Stderr); err != nil {
fmt.Fprintln(os.Stderr, "notioncrawl:", err)
os.Exit(1)
}
fmt.Fprintln(os.Stderr, "notioncrawl: implementation in progress")
os.Exit(2)
}
func run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
global := flag.NewFlagSet("notioncrawl", flag.ContinueOnError)
global.SetOutput(stderr)
configPath := global.String("config", "", "config file path")
dbPath := global.String("db", "", "database path override")
if err := global.Parse(args); err != nil {
return err
}
rest := global.Args()
if len(rest) == 0 || rest[0] == "help" || rest[0] == "--help" || rest[0] == "-h" {
printHelp(stdout)
return nil
}
cmd := rest[0]
cmdArgs := rest[1:]
if cmd == "init" {
path, err := config.WriteStarter(*configPath)
if err != nil {
return err
}
fmt.Fprintf(stdout, "wrote %s\n", path)
return nil
}
cfg, err := config.Load(*configPath)
if err != nil {
return err
}
if *dbPath != "" {
cfg.DBPath, err = config.ExpandPath(*dbPath)
if err != nil {
return err
}
}
switch cmd {
case "doctor":
return runDoctor(ctx, stdout, cfg)
case "sync":
return runSync(ctx, stdout, cfg, cmdArgs)
case "export-md":
return runExportMarkdown(ctx, stdout, cfg)
case "search":
return runSearch(ctx, stdout, cfg, cmdArgs)
case "sql":
return runSQL(ctx, stdout, cfg, cmdArgs)
case "publish":
return runPublish(ctx, stdout, cfg, cmdArgs)
case "subscribe":
return runSubscribe(ctx, stdout, cfg, cmdArgs)
case "update":
return runUpdate(ctx, stdout, cfg, cmdArgs)
default:
return fmt.Errorf("unknown command %q", cmd)
}
}
func runDoctor(ctx context.Context, stdout io.Writer, cfg config.Config) error {
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
desktop, err := notiondesktop.Inspect(cfg.Notion.Desktop.Path)
if err != nil {
return err
}
report := map[string]any{
"db_path": cfg.DBPath,
"cache_dir": cfg.CacheDir,
"markdown_dir": cfg.MarkdownDir,
"desktop_path": desktop.Path,
"desktop_available": desktop.Available,
"desktop_size": desktop.SizeBytes,
"api_token_env": cfg.Notion.API.TokenEnv,
"api_token_present": cfg.APIToken() != "",
}
_ = ctx
b, err := json.MarshalIndent(report, "", " ")
if err != nil {
return err
}
fmt.Fprintln(stdout, string(b))
return nil
}
func runSync(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
fs := flag.NewFlagSet("sync", flag.ContinueOnError)
source := fs.String("source", "all", "source: desktop, api, all")
if err := fs.Parse(args); err != nil {
return err
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
switch *source {
case "desktop":
s, err := notiondesktop.Ingest(ctx, st, cfg.Notion.Desktop.Path, cfg.CacheDir)
if err != nil {
return err
}
fmt.Fprintf(stdout, "desktop: pages=%d blocks=%d collections=%d comments=%d snapshot=%s\n", s.Pages, s.Blocks, s.Collections, s.Comments, s.Source.Snapshot)
case "api":
s, err := notionapi.Client{
BaseURL: cfg.Notion.API.BaseURL,
Version: cfg.Notion.API.Version,
Token: cfg.APIToken(),
}.Sync(ctx, st)
if err != nil {
return err
}
fmt.Fprintf(stdout, "api: users=%d pages=%d blocks=%d comments=%d\n", s.Users, s.Pages, s.Blocks, s.Comments)
case "all":
if cfg.Notion.Desktop.Enabled {
s, err := notiondesktop.Ingest(ctx, st, cfg.Notion.Desktop.Path, cfg.CacheDir)
if err != nil {
return err
}
fmt.Fprintf(stdout, "desktop: pages=%d blocks=%d collections=%d comments=%d snapshot=%s\n", s.Pages, s.Blocks, s.Collections, s.Comments, s.Source.Snapshot)
}
if cfg.Notion.API.Enabled && cfg.APIToken() != "" {
s, err := notionapi.Client{
BaseURL: cfg.Notion.API.BaseURL,
Version: cfg.Notion.API.Version,
Token: cfg.APIToken(),
}.Sync(ctx, st)
if err != nil {
return err
}
fmt.Fprintf(stdout, "api: users=%d pages=%d blocks=%d comments=%d\n", s.Users, s.Pages, s.Blocks, s.Comments)
}
default:
return fmt.Errorf("unknown source %q", *source)
}
return nil
}
func runExportMarkdown(ctx context.Context, stdout io.Writer, cfg config.Config) error {
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
s, err := markdown.Exporter{Store: st, Dir: cfg.MarkdownDir}.Export(ctx)
if err != nil {
return err
}
fmt.Fprintf(stdout, "exported %d pages to %s\n", s.Pages, cfg.MarkdownDir)
return nil
}
func runSearch(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
if len(args) == 0 {
return fmt.Errorf("search query required")
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
results, err := st.Search(ctx, strings.Join(args, " "), 20)
if err != nil {
return err
}
for _, r := range results {
fmt.Fprintf(stdout, "%s\t%s\t%s\t%s\n", r.Kind, r.ID, r.Title, r.Text)
}
return nil
}
func runSQL(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
if len(args) == 0 {
return fmt.Errorf("sql query required")
}
query := strings.TrimSpace(strings.Join(args, " "))
if !isReadOnlyQuery(query) {
return fmt.Errorf("only read-only select/with/pragma queries are allowed")
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
rows, err := st.DB().QueryContext(ctx, query)
if err != nil {
return err
}
defer rows.Close()
return printRows(stdout, rows)
}
func runPublish(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
fs := flag.NewFlagSet("publish", flag.ContinueOnError)
remote := fs.String("remote", cfg.Share.Remote, "git remote")
repo := fs.String("repo", cfg.Share.RepoPath, "share repo path")
branch := fs.String("branch", cfg.Share.Branch, "share branch")
message := fs.String("message", "archive: notioncrawl snapshot", "commit message")
push := fs.Bool("push", false, "push after commit")
noCommit := fs.Bool("no-commit", false, "write snapshot without committing")
if err := fs.Parse(args); err != nil {
return err
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
if _, err := (markdown.Exporter{Store: st, Dir: cfg.MarkdownDir}).Export(ctx); err != nil {
return err
}
s, err := share.Publish(ctx, st, share.PublishOptions{
RepoPath: *repo, Remote: *remote, Branch: *branch, MarkdownDir: cfg.MarkdownDir,
Message: *message, Push: *push, Commit: !*noCommit,
})
if err != nil {
return err
}
fmt.Fprintf(stdout, "published %d tables to %s committed=%t pushed=%t\n", len(s.Manifest.Tables), *repo, s.Committed, s.Pushed)
return nil
}
func runSubscribe(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
fs := flag.NewFlagSet("subscribe", flag.ContinueOnError)
repo := fs.String("repo", cfg.Share.RepoPath, "share repo path")
branch := fs.String("branch", cfg.Share.Branch, "share branch")
if err := fs.Parse(args); err != nil {
return err
}
remote := cfg.Share.Remote
if fs.NArg() > 0 {
remote = fs.Arg(0)
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
manifest, err := share.Subscribe(ctx, st, remote, *repo, *branch)
if err != nil {
return err
}
fmt.Fprintf(stdout, "subscribed %s tables=%d generated_at=%s\n", remote, len(manifest.Tables), manifest.GeneratedAt)
return nil
}
func runUpdate(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
fs := flag.NewFlagSet("update", flag.ContinueOnError)
repo := fs.String("repo", cfg.Share.RepoPath, "share repo path")
branch := fs.String("branch", cfg.Share.Branch, "share branch")
if err := fs.Parse(args); err != nil {
return err
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
manifest, err := share.Update(ctx, st, *repo, *branch)
if err != nil {
return err
}
fmt.Fprintf(stdout, "updated tables=%d generated_at=%s\n", len(manifest.Tables), manifest.GeneratedAt)
return nil
}
func printRows(w io.Writer, rows *sql.Rows) error {
cols, err := rows.Columns()
if err != nil {
return err
}
fmt.Fprintln(w, strings.Join(cols, "\t"))
for rows.Next() {
values := make([]any, len(cols))
ptrs := make([]any, len(cols))
for i := range values {
ptrs[i] = &values[i]
}
if err := rows.Scan(ptrs...); err != nil {
return err
}
for i, value := range values {
if i > 0 {
fmt.Fprint(w, "\t")
}
switch x := value.(type) {
case nil:
fmt.Fprint(w, "")
case []byte:
fmt.Fprint(w, string(x))
default:
fmt.Fprint(w, x)
}
}
fmt.Fprintln(w)
}
return rows.Err()
}
func isReadOnlyQuery(query string) bool {
lower := strings.ToLower(strings.TrimSpace(query))
return strings.HasPrefix(lower, "select ") || strings.HasPrefix(lower, "with ") || strings.HasPrefix(lower, "pragma ")
}
func printHelp(w io.Writer) {
fmt.Fprint(w, `Usage of notioncrawl:
notioncrawl [global flags] <command> [args]
Global flags:
--config PATH config file path
--db PATH database path override
Commands:
init Write a starter config
doctor Check config, database, desktop cache, and token
sync --source desktop Ingest Notion Desktop cache
sync --source api Ingest through the official Notion API
sync --source all Run enabled sources
export-md Render normalized Markdown from SQLite
search QUERY Search page text
sql QUERY Run read-only SQL
publish [--push] Export data and Markdown into a git share repo
subscribe REMOTE Clone/import a git share repo
update Pull/import a git share repo
`)
}