feat: support Notion data sources and database maintenance

This commit is contained in:
Vincent Koc 2026-04-22 22:11:02 -07:00 committed by GitHub
parent 0a1d9b1992
commit 796a5a03d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 457 additions and 30 deletions

View File

@ -21,9 +21,11 @@ to without holding Notion credentials.
- read-only local desktop cache ingestion from macOS Notion
- official API page/block/user/comment ingestion
- Notion database metadata and row ingestion through the official API
- current Notion data-source API support plus legacy database endpoint support
- normalized Markdown export organized by space and page path
- CSV/TSV export for crawled Notion database rows
- compressed JSONL git-share snapshots plus import/update workflows
- archive status and SQLite maintenance commands
- read-only SQL access for ad hoc inspection
## Install
@ -43,6 +45,7 @@ Use the local Notion Desktop cache:
```bash
notcrawl init
notcrawl doctor
notcrawl status
notcrawl sync --source desktop
notcrawl export-md
notcrawl search "launch plan"
@ -69,6 +72,8 @@ Default paths:
- `init` writes a starter config
- `doctor` checks config, SQLite, desktop cache, and token presence
- `status` prints archive counts, last sync time, and database/WAL size
- `maintain` rebuilds FTS, optimizes SQLite indexes, and can run `VACUUM`
- `sync` ingests from `desktop`, `api`, or `all`
- `export-md` renders normalized Markdown files from SQLite
- `databases` lists crawled Notion databases

16
SPEC.md
View File

@ -22,12 +22,13 @@ V1 scope:
- read-only desktop snapshot ingestion
- official Notion API sync
- pages and blocks
- databases/data sources as collections
- databases/data sources as collections, including current data-source API endpoints
- database rows as pages linked to their collection
- comments and discussions where available
- users and spaces/workspaces
- FTS5 search over rendered page/comment text
- raw SQL access
- archive status and SQLite maintenance commands
- Markdown export
- CSV/TSV export for database rows
- git-backed archive publishing and subscription
@ -72,10 +73,17 @@ API sync uses `NOTION_TOKEN` by default. It must:
5. obey `Retry-After` on rate limits
6. store raw JSON plus normalized rows
New configs should use the current Notion API version. Existing configs pinned
to legacy `2022-06-28` must continue using deprecated database query endpoints.
## SQLite Archive
SQLite is canonical. Markdown is generated output.
Store startup must enable WAL, foreign keys, a busy timeout, normal
synchronous writes, in-memory temp storage, and the crawler query indexes needed
for common page, collection, comment, raw-record, and sync-state lookups.
Core tables:
- `spaces`
@ -135,9 +143,9 @@ SQLite without requiring Notion credentials.
## Database Export
API sync discovers databases visible to the integration, stores database
metadata in `collections`, queries each database for row pages, and links those
pages through `pages.collection_id`.
API sync discovers databases/data sources visible to the integration, stores
metadata in `collections`, queries each collection for row pages, and links
those pages through `pages.collection_id`.
`export-db` renders row properties into delimited text:

View File

@ -66,6 +66,10 @@ func run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
switch cmd {
case "doctor":
return runDoctor(ctx, stdout, cfg)
case "status":
return runStatus(ctx, stdout, cfg)
case "maintain":
return runMaintain(ctx, stdout, cfg, cmdArgs)
case "sync":
return runSync(ctx, stdout, cfg, cmdArgs)
case "export-md":
@ -109,7 +113,12 @@ func runDoctor(ctx context.Context, stdout io.Writer, cfg config.Config) error {
"api_token_env": cfg.Notion.API.TokenEnv,
"api_token_present": cfg.APIToken() != "",
}
_ = ctx
status, err := st.Status(ctx)
if err != nil {
return err
}
report["status"] = status
report["api_version"] = cfg.Notion.API.Version
b, err := json.MarshalIndent(report, "", " ")
if err != nil {
return err
@ -118,6 +127,47 @@ func runDoctor(ctx context.Context, stdout io.Writer, cfg config.Config) error {
return nil
}
func runStatus(ctx context.Context, stdout io.Writer, cfg config.Config) error {
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
status, err := st.Status(ctx)
if err != nil {
return err
}
b, err := json.MarshalIndent(status, "", " ")
if err != nil {
return err
}
fmt.Fprintln(stdout, string(b))
return nil
}
func runMaintain(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
fs := flag.NewFlagSet("maintain", flag.ContinueOnError)
vacuum := fs.Bool("vacuum", false, "run VACUUM after rebuilding and optimizing indexes")
if err := fs.Parse(args); err != nil {
return err
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
summary, err := st.Optimize(ctx, *vacuum)
if err != nil {
return err
}
b, err := json.MarshalIndent(summary, "", " ")
if err != nil {
return err
}
fmt.Fprintln(stdout, string(b))
return nil
}
func runSync(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
fs := flag.NewFlagSet("sync", flag.ContinueOnError)
source := fs.String("source", "all", "source: desktop, api, all")
@ -405,6 +455,8 @@ Global flags:
Commands:
init Write a starter config
doctor Check config, database, desktop cache, and token
status Show archive counts and database size
maintain [--vacuum] Rebuild FTS and optimize SQLite indexes
sync --source desktop Ingest Notion Desktop cache
sync --source api Ingest through the official Notion API
sync --source all Run enabled sources

View File

@ -10,7 +10,7 @@ path = ""
enabled = true
token_env = "NOTION_TOKEN"
base_url = "https://api.notion.com/v1"
version = "2022-06-28"
version = "2026-03-11"
[share]
remote = ""

View File

@ -14,6 +14,7 @@ import (
const (
defaultDirName = ".notcrawl"
defaultDesktopPath = "~/Library/Application Support/Notion/notion.db"
defaultAPIVersion = "2026-03-11"
)
type Config struct {
@ -60,7 +61,7 @@ func Default() Config {
Enabled: true,
TokenEnv: "NOTION_TOKEN",
BaseURL: "https://api.notion.com/v1",
Version: "2022-06-28",
Version: defaultAPIVersion,
},
},
Share: ShareConfig{
@ -150,7 +151,7 @@ func (c *Config) Resolve() error {
c.Notion.API.BaseURL = "https://api.notion.com/v1"
}
if strings.TrimSpace(c.Notion.API.Version) == "" {
c.Notion.API.Version = "2022-06-28"
c.Notion.API.Version = defaultAPIVersion
}
if strings.TrimSpace(c.Share.Branch) == "" {
c.Share.Branch = "main"

View File

@ -0,0 +1,10 @@
package config
import "testing"
func TestDefaultUsesCurrentNotionAPIVersion(t *testing.T) {
cfg := Default()
if cfg.Notion.API.Version != "2026-03-11" {
t.Fatalf("unexpected API version: %s", cfg.Notion.API.Version)
}
}

View File

@ -41,7 +41,7 @@ func (c Client) Sync(ctx context.Context, st *store.Store) (Summary, error) {
c.BaseURL = "https://api.notion.com/v1"
}
if c.Version == "" {
c.Version = "2022-06-28"
c.Version = "2026-03-11"
}
if c.HTTP == nil {
c.HTTP = http.DefaultClient
@ -73,12 +73,12 @@ func (c Client) Sync(ctx context.Context, st *store.Store) (Summary, error) {
s.Blocks += count
s.Comments += comments
}
databases, err := c.searchDatabases(ctx)
collections, err := c.searchCollections(ctx)
if err != nil {
return s, err
}
for _, database := range databases {
rows, err := c.ingestDatabase(ctx, st, database)
for _, collection := range collections {
rows, err := c.ingestCollection(ctx, st, collection)
if err != nil {
return s, err
}
@ -145,8 +145,8 @@ func (c Client) searchPages(ctx context.Context) ([]obj, error) {
return c.searchObjects(ctx, "page")
}
func (c Client) searchDatabases(ctx context.Context) ([]obj, error) {
return c.searchObjects(ctx, "database")
func (c Client) searchCollections(ctx context.Context) ([]obj, error) {
return c.searchObjects(ctx, c.collectionSearchType())
}
func (c Client) searchObjects(ctx context.Context, objectType string) ([]obj, error) {
@ -236,21 +236,25 @@ func (c Client) ingestPage(ctx context.Context, st *store.Store, page obj, opts
return blocks, comments, nil
}
func (c Client) ingestDatabase(ctx context.Context, st *store.Store, database obj) (int, error) {
id := database.string("id")
raw := notiontext.MarshalRaw(database)
parent := database.mapObj("parent")
name := notiontext.Plain(database["title"])
func (c Client) ingestCollection(ctx context.Context, st *store.Store, collection obj) (int, error) {
id := collection.string("id")
raw := notiontext.MarshalRaw(collection)
parent := collection.mapObj("parent")
if len(parent) == 0 {
parent = collection.mapObj("database_parent")
}
parentID := firstNonEmpty(parent.string("database_id"), parent.string("page_id"), parent.string("block_id"), parent.string("workspace"))
name := notiontext.Plain(collection["title"])
if name == "" {
name = id
}
if err := st.UpsertCollection(ctx, store.Collection{
ID: id,
SpaceID: parent.string("workspace"),
ParentID: firstNonEmpty(parent.string("page_id"), parent.string("block_id"), parent.string("workspace")),
ParentID: parentID,
Name: name,
SchemaJSON: marshalAny(database["properties"]),
FormatJSON: marshalAny(database),
SchemaJSON: marshalAny(collection["properties"]),
FormatJSON: marshalAny(collection),
RawJSON: raw,
Source: SourceName,
SyncedAt: store.NowMS(),
@ -258,15 +262,15 @@ func (c Client) ingestDatabase(ctx context.Context, st *store.Store, database ob
return 0, err
}
if err := st.UpsertRawRecord(ctx, store.RawRecord{
Source: SourceName, RecordTable: "database", RecordID: id, ParentID: parent.string("page_id"),
Source: SourceName, RecordTable: c.collectionSearchType(), RecordID: id, ParentID: parentID,
SpaceID: parent.string("workspace"), RawJSON: raw, SyncedAt: store.NowMS(),
}); err != nil {
return 0, err
}
return c.queryDatabase(ctx, st, id)
return c.queryCollection(ctx, st, id)
}
func (c Client) queryDatabase(ctx context.Context, st *store.Store, databaseID string) (int, error) {
func (c Client) queryCollection(ctx context.Context, st *store.Store, collectionID string) (int, error) {
var count int
cursor := ""
for {
@ -275,7 +279,7 @@ func (c Client) queryDatabase(ctx context.Context, st *store.Store, databaseID s
body["start_cursor"] = cursor
}
var resp obj
path := fmt.Sprintf("/databases/%s/query", url.PathEscape(databaseID))
path := fmt.Sprintf("%s/%s/query", c.collectionQueryBasePath(), url.PathEscape(collectionID))
if err := c.do(ctx, http.MethodPost, path, body, &resp); err != nil {
return count, err
}
@ -284,7 +288,15 @@ func (c Client) queryDatabase(ctx context.Context, st *store.Store, databaseID s
if !ok {
continue
}
if _, _, err := c.ingestPage(ctx, st, obj(m), ingestPageOptions{CollectionID: databaseID}); err != nil {
if itemType := obj(m).string("object"); itemType != "" && itemType != "page" {
if itemType == c.collectionSearchType() {
if _, err := c.ingestCollection(ctx, st, obj(m)); err != nil {
return count, err
}
}
continue
}
if _, _, err := c.ingestPage(ctx, st, obj(m), ingestPageOptions{CollectionID: collectionID}); err != nil {
return count, err
}
count++
@ -299,6 +311,24 @@ func (c Client) queryDatabase(ctx context.Context, st *store.Store, databaseID s
}
}
func (c Client) collectionSearchType() string {
if c.usesDataSourceAPI() {
return "data_source"
}
return "database"
}
func (c Client) collectionQueryBasePath() string {
if c.usesDataSourceAPI() {
return "/data_sources"
}
return "/databases"
}
func (c Client) usesDataSourceAPI() bool {
return c.Version >= "2025-09-03"
}
func (c Client) walkBlocks(ctx context.Context, st *store.Store, pageID, parentID, spaceID string) (int, error) {
var count int
cursor := ""

View File

@ -97,3 +97,93 @@ func TestSyncIngestsDatabasesAndRows(t *testing.T) {
t.Fatalf("unexpected rows: %+v", rows)
}
}
func TestSyncIngestsCurrentDataSourcesAndRows(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
switch r.URL.Path {
case "/users":
_, _ = w.Write([]byte(`{"object":"list","results":[],"has_more":false}`))
case "/search":
if got := r.Header.Get("Notion-Version"); got != "2026-03-11" {
t.Fatalf("unexpected Notion-Version: %s", got)
}
var body map[string]any
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
t.Fatal(err)
}
filter := body["filter"].(map[string]any)
switch filter["value"] {
case "page":
_, _ = w.Write([]byte(`{"object":"list","results":[],"has_more":false}`))
case "data_source":
_, _ = w.Write([]byte(`{
"object":"list",
"results":[{
"object":"data_source",
"id":"ds1",
"title":[{"plain_text":"Roadmap"}],
"parent":{"type":"database_id","database_id":"db1"},
"database_parent":{"type":"page_id","page_id":"page-parent"},
"properties":{
"Name":{"id":"title","type":"title","title":{}},
"Status":{"id":"status","type":"select","select":{}}
}
}],
"has_more":false
}`))
default:
t.Fatalf("unexpected search filter: %v", filter["value"])
}
case "/data_sources/ds1/query":
_, _ = w.Write([]byte(`{
"object":"list",
"results":[{
"object":"page",
"id":"page1",
"created_time":"2026-01-01T00:00:00Z",
"last_edited_time":"2026-01-02T00:00:00Z",
"in_trash":false,
"url":"https://notion.so/page1",
"parent":{"type":"data_source_id","data_source_id":"ds1"},
"properties":{
"Name":{"id":"title","type":"title","title":[{"plain_text":"Ship"}]},
"Status":{"id":"status","type":"select","select":{"name":"Done"}}
}
}],
"has_more":false
}`))
default:
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.String())
}
}))
defer server.Close()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
summary, err := (Client{BaseURL: server.URL, Version: "2026-03-11", Token: "secret"}).Sync(context.Background(), st)
if err != nil {
t.Fatal(err)
}
if summary.Databases != 1 || summary.DatabaseRows != 1 {
t.Fatalf("unexpected summary: %+v", summary)
}
collections, err := st.Collections(context.Background())
if err != nil {
t.Fatal(err)
}
if len(collections) != 1 || collections[0].ID != "ds1" || collections[0].ParentID != "db1" {
t.Fatalf("unexpected collections: %+v", collections)
}
rows, err := st.CollectionPages(context.Background(), "ds1")
if err != nil {
t.Fatal(err)
}
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "ds1" {
t.Fatalf("unexpected rows: %+v", rows)
}
}

View File

@ -16,19 +16,28 @@ import (
const schemaVersion = 1
type Store struct {
db *sql.DB
db *sql.DB
path string
}
func Open(path string) (*Store, error) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, err
}
db, err := sql.Open("sqlite", path)
if err := ensureDBFile(path); err != nil {
return nil, err
}
db, err := sql.Open("sqlite", sqliteDSN(path))
if err != nil {
return nil, err
}
db.SetMaxOpenConns(1)
st := &Store{db: db}
db.SetMaxIdleConns(1)
if err := db.PingContext(context.Background()); err != nil {
_ = db.Close()
return nil, err
}
st := &Store{db: db, path: path}
if err := st.init(context.Background()); err != nil {
_ = db.Close()
return nil, err
@ -36,18 +45,85 @@ func Open(path string) (*Store, error) {
return st, nil
}
func sqliteDSN(path string) string {
pragmas := "_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)"
if path == ":memory:" {
return "file::memory:?cache=shared&" + pragmas
}
if strings.HasPrefix(path, "file:") {
sep := "?"
if strings.Contains(path, "?") {
sep = "&"
}
return path + sep + pragmas
}
return "file:" + path + "?" + pragmas
}
func ensureDBFile(path string) error {
if path == ":memory:" || strings.HasPrefix(path, "file:") {
return nil
}
if _, err := os.Stat(path); err == nil {
return os.Chmod(path, 0o600)
} else if !errors.Is(err, os.ErrNotExist) {
return err
}
file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil && !errors.Is(err, os.ErrExist) {
return err
}
if file != nil {
if err := file.Close(); err != nil {
return err
}
}
return nil
}
func (s *Store) DB() *sql.DB {
return s.db
}
func (s *Store) Close() error {
if s == nil || s.db == nil {
return nil
}
return s.db.Close()
}
type Status struct {
DBPath string `json:"db_path"`
DBBytes int64 `json:"db_bytes"`
WALBytes int64 `json:"wal_bytes"`
Spaces int `json:"spaces"`
Users int `json:"users"`
Pages int `json:"pages"`
Blocks int `json:"blocks"`
Collections int `json:"collections"`
Comments int `json:"comments"`
RawRecords int `json:"raw_records"`
LastSyncAt int64 `json:"last_sync_at"`
}
type MaintenanceSummary struct {
RebuiltFTS bool `json:"rebuilt_fts"`
Optimized bool `json:"optimized"`
Analyzed bool `json:"analyzed"`
Vacuumed bool `json:"vacuumed"`
DBBytes int64 `json:"db_bytes"`
WALBytes int64 `json:"wal_bytes"`
Message string `json:"message"`
}
func (s *Store) init(ctx context.Context) error {
stmts := []string{
`pragma foreign_keys = on`,
`pragma journal_mode = wal`,
`pragma synchronous = normal`,
`pragma temp_store = memory`,
`pragma mmap_size = 268435456`,
`pragma busy_timeout = 5000`,
`create table if not exists meta (key text primary key, value text not null)`,
`create table if not exists spaces (
id text primary key,
@ -82,6 +158,10 @@ func (s *Store) init(ctx context.Context) error {
raw_json text,
synced_at integer not null
)`,
`create index if not exists pages_collection_id on pages(collection_id)`,
`create index if not exists pages_parent_id on pages(parent_id)`,
`create index if not exists pages_last_edited_time on pages(last_edited_time desc)`,
`create index if not exists pages_source_synced_at on pages(source, synced_at desc)`,
`create table if not exists blocks (
id text primary key,
page_id text,
@ -101,6 +181,7 @@ func (s *Store) init(ctx context.Context) error {
synced_at integer not null
)`,
`create index if not exists blocks_page_id on blocks(page_id)`,
`create index if not exists blocks_page_alive_created on blocks(page_id, alive, created_time, id)`,
`create index if not exists blocks_parent_id on blocks(parent_id)`,
`create table if not exists collections (
id text primary key,
@ -113,6 +194,8 @@ func (s *Store) init(ctx context.Context) error {
source text not null,
synced_at integer not null
)`,
`create index if not exists collections_parent_id on collections(parent_id)`,
`create index if not exists collections_name on collections(name)`,
`create table if not exists comments (
id text primary key,
page_id text,
@ -127,6 +210,8 @@ func (s *Store) init(ctx context.Context) error {
source text not null,
synced_at integer not null
)`,
`create index if not exists comments_page_id on comments(page_id)`,
`create index if not exists comments_created_time on comments(created_time, id)`,
`create table if not exists raw_records (
source text not null,
record_table text not null,
@ -137,6 +222,7 @@ func (s *Store) init(ctx context.Context) error {
synced_at integer not null,
primary key (source, record_table, record_id)
)`,
`create index if not exists raw_records_parent on raw_records(parent_id, record_table)`,
`create table if not exists sync_state (
source text not null,
entity_type text not null,
@ -145,6 +231,7 @@ func (s *Store) init(ctx context.Context) error {
synced_at integer not null,
primary key (source, entity_type, entity_id)
)`,
`create index if not exists sync_state_synced_at on sync_state(synced_at desc)`,
`create virtual table if not exists page_fts using fts5(page_id unindexed, title, body)`,
`create virtual table if not exists comment_fts using fts5(comment_id unindexed, page_id unindexed, body)`,
}
@ -391,3 +478,71 @@ func (s *Store) RebuildFTS(ctx context.Context) error {
_, err = s.db.ExecContext(ctx, `insert into comment_fts(comment_id, page_id, body) select id, page_id, text from comments where alive = 1`)
return err
}
func (s *Store) Status(ctx context.Context) (Status, error) {
status := Status{DBPath: s.path}
counts := []struct {
query string
dest *int
}{
{`select count(*) from spaces`, &status.Spaces},
{`select count(*) from users`, &status.Users},
{`select count(*) from pages`, &status.Pages},
{`select count(*) from blocks`, &status.Blocks},
{`select count(*) from collections`, &status.Collections},
{`select count(*) from comments`, &status.Comments},
{`select count(*) from raw_records`, &status.RawRecords},
}
for _, count := range counts {
if err := s.db.QueryRowContext(ctx, count.query).Scan(count.dest); err != nil {
return Status{}, err
}
}
if err := s.db.QueryRowContext(ctx, `select coalesce(max(synced_at), 0) from sync_state`).Scan(&status.LastSyncAt); err != nil {
return Status{}, err
}
status.DBBytes = fileSize(s.path)
status.WALBytes = fileSize(s.path + "-wal")
return status, nil
}
func (s *Store) Optimize(ctx context.Context, vacuum bool) (MaintenanceSummary, error) {
if err := s.RebuildFTS(ctx); err != nil {
return MaintenanceSummary{}, err
}
for _, stmt := range []string{
`insert into page_fts(page_fts) values('optimize')`,
`insert into comment_fts(comment_fts) values('optimize')`,
`pragma optimize`,
`analyze`,
} {
if _, err := s.db.ExecContext(ctx, stmt); err != nil {
return MaintenanceSummary{}, err
}
}
if vacuum {
if _, err := s.db.ExecContext(ctx, `vacuum`); err != nil {
return MaintenanceSummary{}, err
}
}
return MaintenanceSummary{
RebuiltFTS: true,
Optimized: true,
Analyzed: true,
Vacuumed: vacuum,
DBBytes: fileSize(s.path),
WALBytes: fileSize(s.path + "-wal"),
Message: "database maintenance complete",
}, nil
}
func fileSize(path string) int64 {
if path == "" || path == ":memory:" || strings.HasPrefix(path, "file:") {
return 0
}
info, err := os.Stat(path)
if err != nil {
return 0
}
return info.Size()
}

View File

@ -2,6 +2,7 @@ package store
import (
"context"
"os"
"path/filepath"
"testing"
)
@ -31,3 +32,78 @@ func TestStoreUpsertsAndSearchesPage(t *testing.T) {
t.Fatalf("expected page1, got %q", results[0].ID)
}
}
func TestStoreStatusAndOptimize(t *testing.T) {
path := filepath.Join(t.TempDir(), "notcrawl.db")
st, err := Open(path)
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Launch Plan", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertBlock(ctx, Block{ID: "block1", PageID: "page1", Type: "text", Text: "maintenance keeps search sharp", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.SetSyncState(ctx, "test", "workspace", "default", "done"); err != nil {
t.Fatal(err)
}
status, err := st.Status(ctx)
if err != nil {
t.Fatal(err)
}
if status.Pages != 1 || status.Blocks != 1 || status.LastSyncAt == 0 || status.DBBytes == 0 {
t.Fatalf("unexpected status: %+v", status)
}
summary, err := st.Optimize(ctx, false)
if err != nil {
t.Fatal(err)
}
if !summary.RebuiltFTS || !summary.Optimized || !summary.Analyzed || summary.Vacuumed {
t.Fatalf("unexpected maintenance summary: %+v", summary)
}
results, err := st.Search(ctx, "maintenance", 10)
if err != nil {
t.Fatal(err)
}
if len(results) != 1 || results[0].ID != "page1" {
t.Fatalf("unexpected search results after optimize: %+v", results)
}
}
func TestStoreOpenAppliesSQLitePragmasAndPrivateFileMode(t *testing.T) {
path := filepath.Join(t.TempDir(), "notcrawl.db")
st, err := Open(path)
if err != nil {
t.Fatal(err)
}
defer st.Close()
info, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
if info.Mode().Perm()&0o077 != 0 {
t.Fatalf("database should not be group/world-readable: %s", info.Mode().Perm())
}
var journalMode string
if err := st.DB().QueryRow(`pragma journal_mode`).Scan(&journalMode); err != nil {
t.Fatal(err)
}
if journalMode != "wal" {
t.Fatalf("expected WAL journal mode, got %q", journalMode)
}
var busyTimeout int
if err := st.DB().QueryRow(`pragma busy_timeout`).Scan(&busyTimeout); err != nil {
t.Fatal(err)
}
if busyTimeout != 5000 {
t.Fatalf("expected busy_timeout=5000, got %d", busyTimeout)
}
}