Compare commits

..

1 Commits

Author SHA1 Message Date
Vincent Koc
b80faa9e7e
fix(markdown): preserve unicode export paths
Some checks failed
Validation / validate (push) Has been cancelled
2026-04-27 11:03:23 -07:00
16 changed files with 150 additions and 1359 deletions

View File

@ -50,7 +50,7 @@ jobs:
echo "RELEASE_VERSION=${TAG#v}" >> "$GITHUB_ENV"
- name: Build release artifacts
uses: goreleaser/goreleaser-action@v7
uses: goreleaser/goreleaser-action@v6
with:
distribution: goreleaser
version: "~> v2"

View File

@ -22,7 +22,7 @@ to without holding Notion credentials.
- official API page/block/user/comment ingestion
- Notion database metadata and row ingestion through the official API
- current Notion data-source API support plus legacy database endpoint support
- normalized Markdown export organized by Unicode-safe workspace, teamspace, and page paths
- normalized Markdown export organized by Unicode-safe space and page paths
- CSV/TSV export for crawled Notion database rows
- compressed JSONL git-share snapshots plus import/update workflows
- archive status, activity reporting, and SQLite maintenance commands

View File

@ -91,7 +91,6 @@ Core tables:
- `spaces`
- `users`
- `teams`
- `pages`
- `blocks`
- `collections`
@ -110,14 +109,9 @@ readable letters, numbers, CJK text, and emoji while replacing filesystem path
separators and unsafe punctuation with dashes:
```text
pages/<space-slug>/<team-slug>/<page-title>-<short-id>.md
pages/<space-slug>/<page-title>-<short-id>.md
```
The team slug is omitted when no teamspace can be resolved.
Each export removes stale generated `.md` files under the Markdown root while
leaving non-Markdown sidecar files alone.
Each file starts with YAML-ish front matter:
```yaml

View File

@ -206,7 +206,7 @@ func runSync(ctx context.Context, stdout io.Writer, cfg config.Config, args []st
if err != nil {
return err
}
fmt.Fprintf(stdout, "desktop: pages=%d blocks=%d teams=%d collections=%d comments=%d snapshot=%s\n", s.Pages, s.Blocks, s.Teams, s.Collections, s.Comments, s.Source.Snapshot)
fmt.Fprintf(stdout, "desktop: pages=%d blocks=%d collections=%d comments=%d snapshot=%s\n", s.Pages, s.Blocks, s.Collections, s.Comments, s.Source.Snapshot)
case "api":
s, err := notionapi.Client{
BaseURL: cfg.Notion.API.BaseURL,
@ -223,7 +223,7 @@ func runSync(ctx context.Context, stdout io.Writer, cfg config.Config, args []st
if err != nil {
return err
}
fmt.Fprintf(stdout, "desktop: pages=%d blocks=%d teams=%d collections=%d comments=%d snapshot=%s\n", s.Pages, s.Blocks, s.Teams, s.Collections, s.Comments, s.Source.Snapshot)
fmt.Fprintf(stdout, "desktop: pages=%d blocks=%d collections=%d comments=%d snapshot=%s\n", s.Pages, s.Blocks, s.Collections, s.Comments, s.Source.Snapshot)
}
if cfg.Notion.API.Enabled && cfg.APIToken() != "" {
s, err := notionapi.Client{
@ -327,15 +327,11 @@ func runSearch(ctx context.Context, stdout io.Writer, cfg config.Config, args []
return err
}
for _, r := range results {
fmt.Fprintf(stdout, "%s\t%s\t%s\t%s\n", searchField(r.Kind), searchField(r.ID), searchField(r.Title), searchField(r.Text))
fmt.Fprintf(stdout, "%s\t%s\t%s\t%s\n", r.Kind, r.ID, r.Title, r.Text)
}
return nil
}
func searchField(s string) string {
return strings.Join(strings.Fields(s), " ")
}
func runSQL(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
if len(args) == 0 {
return fmt.Errorf("sql query required")

View File

@ -1,10 +0,0 @@
package main
import "testing"
func TestSearchFieldCollapsesRecordSeparators(t *testing.T) {
got := searchField("line one\nline\ttwo line three")
if got != "line one line two line three" {
t.Fatalf("unexpected field: %q", got)
}
}

View File

@ -2,13 +2,11 @@ package markdown
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"github.com/vincentkoc/notcrawl/internal/notiontext"
@ -39,31 +37,23 @@ func (e Exporter) Export(ctx context.Context) (Summary, error) {
if err != nil {
return Summary{}, err
}
paths, err := newPathResolver(ctx, e.Store)
if err != nil {
return Summary{}, err
}
var s Summary
keep := map[string]bool{}
for _, page := range pages {
path, err := e.writePage(ctx, paths, page)
path, err := e.writePage(ctx, page)
if err != nil {
return s, err
}
keep[filepath.Clean(path)] = true
s.Pages++
s.Files = append(s.Files, path)
}
if err := pruneStaleMarkdown(e.Dir, keep); err != nil {
return s, err
}
return s, nil
}
func (e Exporter) writePage(ctx context.Context, paths pathResolver, page store.Page) (string, error) {
spaceName := paths.spaceName(page.SpaceID)
teamID := paths.pageTeamID(page)
teamName := paths.teamName(teamID)
func (e Exporter) writePage(ctx context.Context, page store.Page) (string, error) {
spaceName, err := e.Store.SpaceName(ctx, page.SpaceID)
if err != nil {
return "", err
}
blocks, err := e.Store.PageBlocks(ctx, page.ID)
if err != nil {
return "", err
@ -75,17 +65,12 @@ func (e Exporter) writePage(ctx context.Context, paths pathResolver, page store.
spaceSlug := notiontext.Slug(spaceName)
titleSlug := maxSlug(notiontext.Slug(page.Title), 96)
name := fmt.Sprintf("%s-%s.md", titleSlug, notiontext.ShortID(page.ID))
parts := []string{e.Dir, spaceSlug}
if teamName != "" {
parts = append(parts, notiontext.Slug(teamName))
}
parts = append(parts, name)
path := filepath.Join(parts...)
path := filepath.Join(e.Dir, spaceSlug, name)
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return "", err
}
var b strings.Builder
writeFrontMatter(&b, page, spaceName, teamID, teamName)
writeFrontMatter(&b, page, spaceName)
if page.Title != "" {
fmt.Fprintf(&b, "# %s\n\n", notiontext.MarkdownEscape(page.Title))
}
@ -107,87 +92,11 @@ func (e Exporter) writePage(ctx context.Context, paths pathResolver, page store.
return path, os.WriteFile(path, []byte(out), 0o644)
}
type pathResolver struct {
spaces map[string]string
teams map[string]string
blocks map[string]store.ParentRef
collections map[string]store.ParentRef
}
func newPathResolver(ctx context.Context, st *store.Store) (pathResolver, error) {
spaces, err := st.SpaceNames(ctx)
if err != nil {
return pathResolver{}, err
}
teams, err := st.TeamNames(ctx)
if err != nil {
return pathResolver{}, err
}
blocks, err := st.BlockParents(ctx)
if err != nil {
return pathResolver{}, err
}
collections, err := st.CollectionParents(ctx)
if err != nil {
return pathResolver{}, err
}
return pathResolver{spaces: spaces, teams: teams, blocks: blocks, collections: collections}, nil
}
func (r pathResolver) spaceName(id string) string {
if id == "" {
return "default"
}
if name := r.spaces[id]; name != "" {
return name
}
return "space-" + notiontext.ShortID(id)
}
func (r pathResolver) teamName(id string) string {
if id == "" {
return ""
}
if name := r.teams[id]; name != "" {
return name
}
return "team-" + notiontext.ShortID(id)
}
func (r pathResolver) pageTeamID(page store.Page) string {
return r.resolveTeamID(page.ParentTable, page.ParentID, page.CollectionID, map[string]bool{page.ID: true})
}
func (r pathResolver) resolveTeamID(table, id, collectionID string, seen map[string]bool) string {
if table == "team" {
return id
}
if table == "collection" && id == "" {
id = collectionID
}
if id == "" || seen[table+":"+id] {
return ""
}
seen[table+":"+id] = true
switch table {
case "block":
parent := r.blocks[id]
return r.resolveTeamID(parent.Table, parent.ID, "", seen)
case "collection", "database", "data_source":
parent := r.collections[id]
return r.resolveTeamID(parent.Table, parent.ID, "", seen)
default:
return ""
}
}
func writeFrontMatter(b *strings.Builder, page store.Page, spaceName, teamID, teamName string) {
func writeFrontMatter(b *strings.Builder, page store.Page, spaceName string) {
b.WriteString("---\n")
writeKV(b, "id", page.ID)
writeKV(b, "space_id", page.SpaceID)
writeKV(b, "space", spaceName)
writeKV(b, "team_id", teamID)
writeKV(b, "team", teamName)
writeKV(b, "title", page.Title)
writeKV(b, "source", page.Source)
writeKV(b, "notion_url", page.URL)
@ -302,41 +211,6 @@ func fallback(s, fallback string) string {
return fallback
}
func pruneStaleMarkdown(root string, keep map[string]bool) error {
var dirs []string
if err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
if err != nil {
return err
}
path = filepath.Clean(path)
if d.IsDir() {
if path != filepath.Clean(root) {
dirs = append(dirs, path)
}
return nil
}
if filepath.Ext(path) == ".md" && !keep[path] {
return os.Remove(path)
}
return nil
}); err != nil {
return err
}
sort.Slice(dirs, func(i, j int) bool {
return len(dirs[i]) > len(dirs[j])
})
for _, dir := range dirs {
if err := os.Remove(dir); err != nil && !isIgnorableRemoveDirError(err) {
return err
}
}
return nil
}
func isIgnorableRemoveDirError(err error) bool {
return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTEMPTY) || errors.Is(err, syscall.EEXIST)
}
func formatMS(ms int64) string {
if ms <= 0 {
return ""

View File

@ -104,133 +104,3 @@ func TestExporterPreservesUnicodePathNames(t *testing.T) {
t.Fatalf("unexpected export path: %+v, want %s", s.Files, want)
}
}
func TestExporterUsesWorkspaceAndTeamspacePath(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
now := store.NowMS()
if err := st.UpsertSpace(ctx, store.Space{ID: "space1", Name: "Acme Org", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertTeam(ctx, store.Team{ID: "team1", SpaceID: "space1", Name: "Research Lab", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{ID: "page1", SpaceID: "space1", ParentID: "team1", ParentTable: "team", Title: "Plan", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
dir := t.TempDir()
s, err := Exporter{Store: st, Dir: dir}.Export(ctx)
if err != nil {
t.Fatal(err)
}
want := filepath.Join(dir, "acme-org", "research-lab", "plan-page1.md")
if len(s.Files) != 1 || s.Files[0] != want {
t.Fatalf("unexpected export path: %+v, want %s", s.Files, want)
}
b, err := os.ReadFile(want)
if err != nil {
t.Fatal(err)
}
text := string(b)
if !strings.Contains(text, `team_id: "team1"`) || !strings.Contains(text, `team: "Research Lab"`) {
t.Fatalf("missing team front matter:\n%s", text)
}
}
func TestExporterResolvesTeamspaceThroughCollectionParent(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
now := store.NowMS()
if err := st.UpsertSpace(ctx, store.Space{ID: "space1", Name: "Acme Org", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertTeam(ctx, store.Team{ID: "team1", SpaceID: "space1", Name: "Research Lab", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertCollection(ctx, store.Collection{ID: "collection1", SpaceID: "space1", ParentID: "team1", ParentTable: "team", Name: "Roadmap", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{ID: "page1", SpaceID: "space1", ParentID: "collection1", ParentTable: "collection", CollectionID: "collection1", Title: "Row", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
dir := t.TempDir()
s, err := Exporter{Store: st, Dir: dir}.Export(ctx)
if err != nil {
t.Fatal(err)
}
want := filepath.Join(dir, "acme-org", "research-lab", "row-page1.md")
if len(s.Files) != 1 || s.Files[0] != want {
t.Fatalf("unexpected export path: %+v, want %s", s.Files, want)
}
}
func TestExporterUsesReadableMissingSpaceFallback(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
now := store.NowMS()
spaceID := "52f1c029-ec85-4ff5-bd43-c6d6ea9259e0"
if err := st.UpsertPage(ctx, store.Page{ID: "page1", SpaceID: spaceID, Title: "Loose", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
dir := t.TempDir()
s, err := Exporter{Store: st, Dir: dir}.Export(ctx)
if err != nil {
t.Fatal(err)
}
want := filepath.Join(dir, "space-52f1c029-ea9259e0", "loose-page1.md")
if len(s.Files) != 1 || s.Files[0] != want {
t.Fatalf("unexpected export path: %+v, want %s", s.Files, want)
}
}
func TestExporterPrunesStaleMarkdown(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
now := store.NowMS()
if err := st.UpsertPage(ctx, store.Page{ID: "page1", Title: "Launch", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
dir := t.TempDir()
staleDir := filepath.Join(dir, "old")
if err := os.MkdirAll(staleDir, 0o755); err != nil {
t.Fatal(err)
}
staleMarkdown := filepath.Join(staleDir, "stale.md")
if err := os.WriteFile(staleMarkdown, []byte("old"), 0o644); err != nil {
t.Fatal(err)
}
keepNote := filepath.Join(staleDir, "note.txt")
if err := os.WriteFile(keepNote, []byte("keep"), 0o644); err != nil {
t.Fatal(err)
}
if _, err := (Exporter{Store: st, Dir: dir}).Export(ctx); err != nil {
t.Fatal(err)
}
if _, err := os.Stat(staleMarkdown); !os.IsNotExist(err) {
t.Fatalf("expected stale markdown to be removed, stat err=%v", err)
}
if _, err := os.Stat(keepNote); err != nil {
t.Fatalf("expected non-markdown file to remain: %v", err)
}
}

View File

@ -47,50 +47,45 @@ func (c Client) Sync(ctx context.Context, st *store.Store) (Summary, error) {
c.HTTP = http.DefaultClient
}
var s Summary
if err := st.DeferPageFTS(ctx, func() error {
users, err := c.listUsers(ctx)
users, err := c.listUsers(ctx)
if err != nil {
return s, err
}
for _, u := range users {
raw := notiontext.MarshalRaw(u)
if err := st.UpsertUser(ctx, store.User{
ID: u.string("id"), Name: userName(u), Email: userEmail(u), RawJSON: raw, Source: SourceName, SyncedAt: store.NowMS(),
}); err != nil {
return s, err
}
s.Users++
}
pages, err := c.searchPages(ctx)
if err != nil {
return s, err
}
for _, page := range pages {
count, comments, err := c.ingestPage(ctx, st, page, ingestPageOptions{FetchBlocks: true, FetchComments: true})
if err != nil {
return err
return s, err
}
for _, u := range users {
raw := notiontext.MarshalRaw(u)
if err := st.UpsertUser(ctx, store.User{
ID: u.string("id"), Name: userName(u), Email: userEmail(u), RawJSON: raw, Source: SourceName, SyncedAt: store.NowMS(),
}); err != nil {
return err
}
s.Users++
}
pages, err := c.searchPages(ctx)
s.Pages++
s.Blocks += count
s.Comments += comments
}
collections, err := c.searchCollections(ctx)
if err != nil {
return s, err
}
for _, collection := range collections {
rows, err := c.ingestCollection(ctx, st, collection)
if err != nil {
return err
return s, err
}
for _, page := range pages {
count, comments, err := c.ingestPage(ctx, st, page, ingestPageOptions{FetchBlocks: true, FetchComments: true})
if err != nil {
return err
}
s.Pages++
s.Blocks += count
s.Comments += comments
}
collections, err := c.searchCollections(ctx)
if err != nil {
return err
}
for _, collection := range collections {
rows, err := c.ingestCollection(ctx, st, collection)
if err != nil {
return err
}
s.Databases++
s.DatabaseRows += rows
}
if err := st.SetSyncState(ctx, SourceName, "workspace", "default", time.Now().Format(time.RFC3339)); err != nil {
return err
}
return nil
}); err != nil {
s.Databases++
s.DatabaseRows += rows
}
if err := st.SetSyncState(ctx, SourceName, "workspace", "default", time.Now().Format(time.RFC3339)); err != nil {
return s, err
}
return s, nil
@ -254,16 +249,15 @@ func (c Client) ingestCollection(ctx context.Context, st *store.Store, collectio
name = id
}
if err := st.UpsertCollection(ctx, store.Collection{
ID: id,
SpaceID: parent.string("workspace"),
ParentID: parentID,
ParentTable: parent.string("type"),
Name: name,
SchemaJSON: marshalAny(collection["properties"]),
FormatJSON: marshalAny(collection),
RawJSON: raw,
Source: SourceName,
SyncedAt: store.NowMS(),
ID: id,
SpaceID: parent.string("workspace"),
ParentID: parentID,
Name: name,
SchemaJSON: marshalAny(collection["properties"]),
FormatJSON: marshalAny(collection),
RawJSON: raw,
Source: SourceName,
SyncedAt: store.NowMS(),
}); err != nil {
return 0, err
}

View File

@ -7,7 +7,6 @@ import (
"io"
"os"
"path/filepath"
"sort"
"strings"
"time"
@ -17,7 +16,6 @@ import (
)
const SourceName = "desktop"
const desktopSnapshotRetention = 2
type Source struct {
Path string
@ -30,7 +28,6 @@ type Summary struct {
Source Source
Spaces int
Users int
Teams int
Pages int
Blocks int
Collections int
@ -65,34 +62,19 @@ func Ingest(ctx context.Context, st *store.Store, path, cacheDir string) (Summar
}
defer db.Close()
s := Summary{Source: source}
if err := st.WithTransaction(ctx, func() error {
return st.DeferPageFTS(ctx, func() error {
if s.Spaces, err = ingestSpaces(ctx, st, db); err != nil {
return err
}
if s.Users, err = ingestUsers(ctx, st, db); err != nil {
return err
}
if s.Teams, err = ingestTeams(ctx, st, db); err != nil {
return err
}
if s.Collections, err = ingestCollections(ctx, st, db); err != nil {
return err
}
if s.Pages, s.Blocks, s.RawRecords, err = ingestBlocks(ctx, st, db); err != nil {
return err
}
if s.Comments, err = ingestComments(ctx, st, db); err != nil {
return err
}
addedSpaces, err := st.EnsureSpaceFallbacks(ctx, SourceName)
if err != nil {
return err
}
s.Spaces += addedSpaces
return nil
})
}); err != nil {
if s.Spaces, err = ingestSpaces(ctx, st, db); err != nil {
return s, err
}
if s.Users, err = ingestUsers(ctx, st, db); err != nil {
return s, err
}
if s.Collections, err = ingestCollections(ctx, st, db); err != nil {
return s, err
}
if s.Pages, s.Blocks, s.RawRecords, err = ingestBlocks(ctx, st, db); err != nil {
return s, err
}
if s.Comments, err = ingestComments(ctx, st, db); err != nil {
return s, err
}
if err := st.SetSyncState(ctx, SourceName, "desktop", "notion.db", snapshot); err != nil {
@ -129,65 +111,9 @@ func snapshotDB(path, cacheDir string) (string, error) {
return "", err
}
}
if err := pruneDesktopSnapshots(cacheDir, desktopSnapshotRetention, outPath); err != nil {
return "", err
}
return outPath, nil
}
type desktopSnapshot struct {
path string
modTime time.Time
}
func pruneDesktopSnapshots(cacheDir string, keep int, current string) error {
if keep < 1 {
keep = 1
}
entries, err := os.ReadDir(cacheDir)
if err != nil {
return err
}
var snapshots []desktopSnapshot
for _, entry := range entries {
name := entry.Name()
if entry.IsDir() || !strings.HasPrefix(name, "notion-desktop-") || !strings.HasSuffix(name, ".db") {
continue
}
path := filepath.Join(cacheDir, name)
info, err := entry.Info()
if err != nil {
return err
}
snapshots = append(snapshots, desktopSnapshot{path: path, modTime: info.ModTime()})
}
sort.SliceStable(snapshots, func(i, j int) bool {
if snapshots[i].modTime.Equal(snapshots[j].modTime) {
return snapshots[i].path > snapshots[j].path
}
return snapshots[i].modTime.After(snapshots[j].modTime)
})
keepPaths := map[string]bool{}
if current != "" {
keepPaths[filepath.Clean(current)] = true
}
for i := 0; i < len(snapshots) && len(keepPaths) < keep; i++ {
keepPaths[filepath.Clean(snapshots[i].path)] = true
}
for _, snapshot := range snapshots {
path := filepath.Clean(snapshot.path)
if keepPaths[path] {
continue
}
for _, target := range []string{path, path + "-wal", path + "-shm"} {
if err := os.Remove(target); err != nil && !os.IsNotExist(err) {
return err
}
}
}
return nil
}
func copyFile(src, dst string, perm os.FileMode) error {
in, err := os.Open(src)
if err != nil {
@ -250,38 +176,9 @@ func ingestUsers(ctx context.Context, st *store.Store, db *sql.DB) (int, error)
return n, rows.Err()
}
func ingestTeams(ctx context.Context, st *store.Store, db *sql.DB) (int, error) {
rows, err := db.QueryContext(ctx, `select id, space_id, parent_id, parent_table, coalesce(name, ''),
coalesce(json_object('id', id, 'space_id', space_id, 'parent_id', parent_id, 'parent_table', parent_table,
'name', name, 'description', description, 'team_pages', team_pages, 'settings', settings), '{}')
from team where coalesce(archived_at, 0) = 0`)
if err != nil {
return 0, ignoreMissingTable(err)
}
defer rows.Close()
n := 0
for rows.Next() {
var x store.Team
if err := rows.Scan(&x.ID, &x.SpaceID, &x.ParentID, &x.ParentTable, &x.Name, &x.RawJSON); err != nil {
return n, err
}
if x.Name == "" {
x.Name = x.ID
}
x.Source = SourceName
x.SyncedAt = store.NowMS()
if err := st.UpsertTeam(ctx, x); err != nil {
return n, err
}
n++
}
return n, rows.Err()
}
func ingestCollections(ctx context.Context, st *store.Store, db *sql.DB) (int, error) {
rows, err := db.QueryContext(ctx, `select id, space_id, parent_id, parent_table, coalesce(name, ''), coalesce(schema, ''), coalesce(format, ''),
coalesce(json_object('id', id, 'space_id', space_id, 'parent_id', parent_id, 'parent_table', parent_table,
'name', name, 'schema', schema, 'format', format), '{}')
rows, err := db.QueryContext(ctx, `select id, space_id, parent_id, coalesce(name, ''), coalesce(schema, ''), coalesce(format, ''),
coalesce(json_object('id', id, 'space_id', space_id, 'parent_id', parent_id, 'name', name, 'schema', schema, 'format', format), '{}')
from collection where alive = 1`)
if err != nil {
return 0, ignoreMissingTable(err)
@ -290,7 +187,7 @@ func ingestCollections(ctx context.Context, st *store.Store, db *sql.DB) (int, e
n := 0
for rows.Next() {
var x store.Collection
if err := rows.Scan(&x.ID, &x.SpaceID, &x.ParentID, &x.ParentTable, &x.Name, &x.SchemaJSON, &x.FormatJSON, &x.RawJSON); err != nil {
if err := rows.Scan(&x.ID, &x.SpaceID, &x.ParentID, &x.Name, &x.SchemaJSON, &x.FormatJSON, &x.RawJSON); err != nil {
return n, err
}
x.Name = notiontext.TitleFromProperties(x.Name)
@ -321,7 +218,6 @@ type localBlock struct {
Alive bool
FormatJSON string
RawJSON string
Text string
}
func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int, blocks int, rawRecords int, err error) {
@ -346,7 +242,6 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
return pages, blocks, rawRecords, err
}
b.Alive = alive != 0
b.Text = blockText(b.PropertiesJSON)
byID[b.ID] = b
all = append(all, b)
}
@ -373,8 +268,11 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
return ""
}
pageFor = func(id string) string { return resolve(id, map[string]bool{}) }
children := childBlocksByParent(all)
for _, b := range all {
title := notiontext.TitleFromProperties(b.PropertiesJSON)
if title == "" && isPageType(b.Type) {
title = "Untitled"
}
if isPageType(b.Type) {
if err := st.UpsertPage(ctx, store.Page{
ID: b.ID,
@ -382,7 +280,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
ParentID: b.ParentID,
ParentTable: b.ParentTable,
CollectionID: b.CollectionID,
Title: pageTitle(b, children),
Title: title,
PropertiesJSON: b.PropertiesJSON,
CreatedTime: b.CreatedTime,
LastEditedTime: b.LastEditedTime,
@ -396,6 +294,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
pages++
}
pageID := pageFor(b.ID)
text := notiontext.PlainFromJSON(b.PropertiesJSON)
if err := st.UpsertBlock(ctx, store.Block{
ID: b.ID,
PageID: pageID,
@ -403,7 +302,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
ParentID: b.ParentID,
ParentTable: b.ParentTable,
Type: b.Type,
Text: b.Text,
Text: text,
PropertiesJSON: b.PropertiesJSON,
ContentJSON: b.ContentJSON,
FormatJSON: b.FormatJSON,
@ -428,73 +327,6 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
return pages, blocks, rawRecords, nil
}
func childBlocksByParent(blocks []localBlock) map[string][]localBlock {
children := map[string][]localBlock{}
for _, block := range blocks {
if !block.Alive || block.ParentID == "" {
continue
}
children[block.ParentID] = append(children[block.ParentID], block)
}
for parent := range children {
sort.SliceStable(children[parent], func(i, j int) bool {
a, z := children[parent][i], children[parent][j]
if a.CreatedTime == z.CreatedTime {
return a.ID < z.ID
}
return a.CreatedTime < z.CreatedTime
})
}
return children
}
func pageTitle(page localBlock, children map[string][]localBlock) string {
if title := notiontext.TitleFromProperties(page.PropertiesJSON); title != "" {
return title
}
if title := fallbackPageTitle(page.ID, children, map[string]bool{}); title != "" {
return title
}
return "Untitled"
}
func fallbackPageTitle(parentID string, children map[string][]localBlock, seen map[string]bool) string {
if parentID == "" || seen[parentID] {
return ""
}
seen[parentID] = true
for _, child := range children[parentID] {
if !isPageType(child.Type) {
if title := titleSnippet(child.Text); title != "" {
return title
}
}
if title := fallbackPageTitle(child.ID, children, seen); title != "" {
return title
}
}
return ""
}
func titleSnippet(s string) string {
s = notiontext.Normalize(s)
if s == "" {
return ""
}
runes := []rune(s)
if len(runes) > 96 {
return string(runes[:96])
}
return s
}
func blockText(raw string) string {
if title := notiontext.TitleFromProperties(raw); title != "" {
return title
}
return notiontext.PlainFromJSON(raw)
}
func ingestComments(ctx context.Context, st *store.Store, db *sql.DB) (int, error) {
rows, err := db.QueryContext(ctx, `select id, parent_id, space_id, coalesce(text, ''), coalesce(created_by_id, ''),
coalesce(cast(created_time as integer), 0), coalesce(cast(last_edited_time as integer), 0), alive,

View File

@ -1,110 +0,0 @@
package notiondesktop
import (
"context"
"database/sql"
"os"
"path/filepath"
"testing"
"time"
"github.com/vincentkoc/notcrawl/internal/store"
_ "modernc.org/sqlite"
)
func TestPruneDesktopSnapshotsKeepsNewestAndSidecars(t *testing.T) {
dir := t.TempDir()
names := []string{
"notion-desktop-1000.db",
"notion-desktop-2000.db",
"notion-desktop-3000.db",
}
for i, name := range names {
path := filepath.Join(dir, name)
if err := os.WriteFile(path, []byte(name), 0o600); err != nil {
t.Fatal(err)
}
for _, suffix := range []string{"-wal", "-shm"} {
if err := os.WriteFile(path+suffix, []byte(suffix), 0o600); err != nil {
t.Fatal(err)
}
}
modTime := time.Unix(int64(i+1), 0)
for _, target := range []string{path, path + "-wal", path + "-shm"} {
if err := os.Chtimes(target, modTime, modTime); err != nil {
t.Fatal(err)
}
}
}
current := filepath.Join(dir, "notion-desktop-3000.db")
if err := pruneDesktopSnapshots(dir, 2, current); err != nil {
t.Fatal(err)
}
for _, name := range []string{"notion-desktop-2000.db", "notion-desktop-3000.db"} {
path := filepath.Join(dir, name)
for _, target := range []string{path, path + "-wal", path + "-shm"} {
if _, err := os.Stat(target); err != nil {
t.Fatalf("expected %s to remain: %v", target, err)
}
}
}
for _, target := range []string{
filepath.Join(dir, "notion-desktop-1000.db"),
filepath.Join(dir, "notion-desktop-1000.db-wal"),
filepath.Join(dir, "notion-desktop-1000.db-shm"),
} {
if _, err := os.Stat(target); !os.IsNotExist(err) {
t.Fatalf("expected %s to be pruned, got %v", target, err)
}
}
}
func TestIngestBlocksDerivesUntitledPageFromChildText(t *testing.T) {
ctx := context.Background()
src, err := sql.Open("sqlite", filepath.Join(t.TempDir(), "desktop.db"))
if err != nil {
t.Fatal(err)
}
defer src.Close()
if _, err := src.ExecContext(ctx, `create table block (
id text primary key,
space_id text,
type text,
properties text,
content text,
collection_id text,
created_time integer,
last_edited_time integer,
parent_id text,
parent_table text,
alive integer,
format text
)`); err != nil {
t.Fatal(err)
}
if _, err := src.ExecContext(ctx, `insert into block(id, space_id, type, properties, content, collection_id, created_time, last_edited_time, parent_id, parent_table, alive, format)
values
('page1', 'space1', 'page', '{}', '', '', 1, 1, '', '', 1, ''),
('child1', 'space1', 'text', '{"title":[["Decision log"]]}', '', '', 2, 2, 'page1', 'block', 1, '')`); err != nil {
t.Fatal(err)
}
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
if _, _, _, err := ingestBlocks(ctx, st, src); err != nil {
t.Fatal(err)
}
var title string
if err := st.DB().QueryRowContext(ctx, `select title from pages where id = 'page1'`).Scan(&title); err != nil {
t.Fatal(err)
}
if title != "Decision log" {
t.Fatalf("expected child text title, got %q", title)
}
}

View File

@ -6,7 +6,6 @@ import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"io"
"os"
@ -14,7 +13,6 @@ import (
"path/filepath"
"sort"
"strings"
"syscall"
"time"
"github.com/vincentkoc/notcrawl/internal/store"
@ -23,7 +21,6 @@ import (
var exportTables = []string{
"spaces",
"users",
"teams",
"pages",
"blocks",
"collections",
@ -72,42 +69,28 @@ func Publish(ctx context.Context, st *store.Store, opts PublishOptions) (Publish
if err := ensureRepo(ctx, opts.RepoPath, opts.Remote, opts.Branch); err != nil {
return PublishSummary{}, err
}
dataRoot := filepath.Join(opts.RepoPath, "data")
pagesRoot := filepath.Join(opts.RepoPath, "pages")
if err := os.MkdirAll(dataRoot, 0o755); err != nil {
if err := os.RemoveAll(filepath.Join(opts.RepoPath, "data")); err != nil {
return PublishSummary{}, err
}
if err := os.MkdirAll(pagesRoot, 0o755); err != nil {
if err := os.RemoveAll(filepath.Join(opts.RepoPath, "pages")); err != nil {
return PublishSummary{}, err
}
if err := os.MkdirAll(filepath.Join(opts.RepoPath, "data"), 0o755); err != nil {
return PublishSummary{}, err
}
manifest := Manifest{GeneratedAt: time.Now().UTC().Format(time.RFC3339)}
dataKeep := map[string]bool{}
for _, table := range exportTables {
tm, err := exportTable(ctx, st.DB(), opts.RepoPath, table)
if err != nil {
return PublishSummary{}, err
}
manifest.Tables = append(manifest.Tables, tm)
dataKeep[filepath.Clean(filepath.Join(opts.RepoPath, tm.Path))] = true
}
if err := pruneGeneratedFiles(dataRoot, dataKeep, func(path string) bool {
return strings.HasSuffix(path, ".jsonl.gz")
}); err != nil {
return PublishSummary{}, err
}
pagesKeep := map[string]bool{}
if opts.MarkdownDir != "" {
var err error
pagesKeep, err = copyDir(opts.MarkdownDir, pagesRoot)
if err != nil && !os.IsNotExist(err) {
if err := copyDir(opts.MarkdownDir, filepath.Join(opts.RepoPath, "pages")); err != nil && !os.IsNotExist(err) {
return PublishSummary{}, err
}
}
if err := pruneGeneratedFiles(pagesRoot, pagesKeep, func(path string) bool {
return strings.HasSuffix(path, ".md")
}); err != nil {
return PublishSummary{}, err
}
b, err := json.MarshalIndent(manifest, "", " ")
if err != nil {
return PublishSummary{}, err
@ -333,16 +316,15 @@ func run(ctx context.Context, dir, name string, args ...string) error {
return nil
}
func copyDir(src, dst string) (map[string]bool, error) {
func copyDir(src, dst string) error {
info, err := os.Stat(src)
if err != nil {
return nil, err
return err
}
if !info.IsDir() {
return nil, fmt.Errorf("not a directory: %s", src)
return fmt.Errorf("not a directory: %s", src)
}
keep := map[string]bool{}
err = filepath.WalkDir(src, func(path string, d os.DirEntry, err error) error {
return filepath.WalkDir(src, func(path string, d os.DirEntry, err error) error {
if err != nil {
return err
}
@ -367,51 +349,9 @@ func copyDir(src, dst string) (map[string]bool, error) {
return err
}
defer out.Close()
if _, err := io.Copy(out, in); err != nil {
return err
}
keep[filepath.Clean(target)] = true
return nil
})
return keep, err
}
func pruneGeneratedFiles(root string, keep map[string]bool, shouldPrune func(string) bool) error {
if _, err := os.Stat(root); err != nil {
if os.IsNotExist(err) {
return nil
}
_, err = io.Copy(out, in)
return err
}
var dirs []string
if err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
if err != nil {
return err
}
if path == root {
return nil
}
if d.IsDir() {
dirs = append(dirs, path)
return nil
}
clean := filepath.Clean(path)
if shouldPrune(clean) && !keep[clean] {
return os.Remove(clean)
}
return nil
}); err != nil {
return err
}
sort.Slice(dirs, func(i, j int) bool {
return len(dirs[i]) > len(dirs[j])
})
for _, dir := range dirs {
if err := os.Remove(dir); err != nil && !os.IsNotExist(err) && !errors.Is(err, syscall.ENOTEMPTY) && !errors.Is(err, syscall.EEXIST) {
return err
}
}
return nil
}
func exportValue(v any) any {

View File

@ -39,35 +39,6 @@ func TestPublishAndImportSnapshot(t *testing.T) {
if _, err := os.Stat(filepath.Join(repo, "pages", "default", "launch-page1.md")); err != nil {
t.Fatal(err)
}
stalePage := filepath.Join(repo, "pages", "default", "stale.md")
if err := os.WriteFile(stalePage, []byte("stale"), 0o644); err != nil {
t.Fatal(err)
}
pageSidecar := filepath.Join(repo, "pages", "default", "README.txt")
if err := os.WriteFile(pageSidecar, []byte("keep"), 0o644); err != nil {
t.Fatal(err)
}
staleData := filepath.Join(repo, "data", "stale.jsonl.gz")
if err := os.WriteFile(staleData, []byte("stale"), 0o644); err != nil {
t.Fatal(err)
}
dataSidecar := filepath.Join(repo, "data", "README.txt")
if err := os.WriteFile(dataSidecar, []byte("keep"), 0o644); err != nil {
t.Fatal(err)
}
if _, err := Publish(ctx, src, PublishOptions{RepoPath: repo, MarkdownDir: mdDir}); err != nil {
t.Fatal(err)
}
for _, path := range []string{stalePage, staleData} {
if _, err := os.Stat(path); !os.IsNotExist(err) {
t.Fatalf("expected generated stale file %s to be pruned, got %v", path, err)
}
}
for _, path := range []string{pageSidecar, dataSidecar} {
if _, err := os.Stat(path); err != nil {
t.Fatalf("expected sidecar %s to remain: %v", path, err)
}
}
dst, err := store.Open(filepath.Join(t.TempDir(), "dst.db"))
if err != nil {
t.Fatal(err)

View File

@ -3,11 +3,10 @@ package store
import (
"context"
"database/sql"
"strings"
)
func (s *Store) Pages(ctx context.Context) ([]Page, error) {
rows, err := s.queryContext(ctx, `select id, space_id, parent_id, parent_table, collection_id, title, url, icon, cover,
rows, err := s.db.QueryContext(ctx, `select id, space_id, parent_id, parent_table, collection_id, title, url, icon, cover,
properties_json, created_time, last_edited_time, alive, source, raw_json, synced_at
from pages where alive = 1 order by coalesce(last_edited_time, 0) desc, title`)
if err != nil {
@ -29,7 +28,7 @@ func (s *Store) Pages(ctx context.Context) ([]Page, error) {
}
func (s *Store) Collections(ctx context.Context) ([]Collection, error) {
rows, err := s.queryContext(ctx, `select id, space_id, parent_id, parent_table, name, schema_json, format_json, raw_json, source, synced_at
rows, err := s.db.QueryContext(ctx, `select id, space_id, parent_id, name, schema_json, format_json, raw_json, source, synced_at
from collections order by lower(coalesce(name, id)), id`)
if err != nil {
return nil, err
@ -38,7 +37,7 @@ func (s *Store) Collections(ctx context.Context) ([]Collection, error) {
var collections []Collection
for rows.Next() {
var c Collection
if err := rows.Scan(&c.ID, &c.SpaceID, &c.ParentID, &c.ParentTable, &c.Name, &c.SchemaJSON, &c.FormatJSON, &c.RawJSON, &c.Source, &c.SyncedAt); err != nil {
if err := rows.Scan(&c.ID, &c.SpaceID, &c.ParentID, &c.Name, &c.SchemaJSON, &c.FormatJSON, &c.RawJSON, &c.Source, &c.SyncedAt); err != nil {
return nil, err
}
collections = append(collections, c)
@ -48,13 +47,13 @@ func (s *Store) Collections(ctx context.Context) ([]Collection, error) {
func (s *Store) Collection(ctx context.Context, id string) (Collection, error) {
var c Collection
err := s.queryRowContext(ctx, `select id, space_id, parent_id, parent_table, name, schema_json, format_json, raw_json, source, synced_at
from collections where id = ?`, id).Scan(&c.ID, &c.SpaceID, &c.ParentID, &c.ParentTable, &c.Name, &c.SchemaJSON, &c.FormatJSON, &c.RawJSON, &c.Source, &c.SyncedAt)
err := s.db.QueryRowContext(ctx, `select id, space_id, parent_id, name, schema_json, format_json, raw_json, source, synced_at
from collections where id = ?`, id).Scan(&c.ID, &c.SpaceID, &c.ParentID, &c.Name, &c.SchemaJSON, &c.FormatJSON, &c.RawJSON, &c.Source, &c.SyncedAt)
return c, err
}
func (s *Store) CollectionPages(ctx context.Context, collectionID string) ([]Page, error) {
rows, err := s.queryContext(ctx, `select id, space_id, parent_id, parent_table, collection_id, title, url, icon, cover,
rows, err := s.db.QueryContext(ctx, `select id, space_id, parent_id, parent_table, collection_id, title, url, icon, cover,
properties_json, created_time, last_edited_time, alive, source, raw_json, synced_at
from pages where collection_id = ? and alive = 1 order by coalesce(last_edited_time, 0) desc, title`, collectionID)
if err != nil {
@ -76,7 +75,7 @@ func (s *Store) CollectionPages(ctx context.Context, collectionID string) ([]Pag
}
func (s *Store) PageBlocks(ctx context.Context, pageID string) ([]Block, error) {
rows, err := s.queryContext(ctx, `select id, page_id, space_id, parent_id, parent_table, type, text, properties_json,
rows, err := s.db.QueryContext(ctx, `select id, page_id, space_id, parent_id, parent_table, type, text, properties_json,
content_json, format_json, display_order, created_time, last_edited_time, alive, source, raw_json, synced_at
from blocks where page_id = ? and alive = 1 order by parent_id, display_order, created_time, id`, pageID)
if err != nil {
@ -98,7 +97,7 @@ func (s *Store) PageBlocks(ctx context.Context, pageID string) ([]Block, error)
}
func (s *Store) PageComments(ctx context.Context, pageID string) ([]Comment, error) {
rows, err := s.queryContext(ctx, `select id, page_id, space_id, parent_id, text, created_by_id,
rows, err := s.db.QueryContext(ctx, `select id, page_id, space_id, parent_id, text, created_by_id,
created_time, last_edited_time, alive, raw_json, source, synced_at
from comments where page_id = ? and alive = 1 order by created_time, id`, pageID)
if err != nil {
@ -119,165 +118,20 @@ func (s *Store) PageComments(ctx context.Context, pageID string) ([]Comment, err
return comments, rows.Err()
}
func (s *Store) SpaceNames(ctx context.Context) (map[string]string, error) {
rows, err := s.queryContext(ctx, `select id, name from spaces`)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]string{}
for rows.Next() {
var id, name string
if err := rows.Scan(&id, &name); err != nil {
return nil, err
}
out[id] = name
}
return out, rows.Err()
}
func (s *Store) TeamNames(ctx context.Context) (map[string]string, error) {
rows, err := s.queryContext(ctx, `select id, name from teams`)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]string{}
for rows.Next() {
var id, name string
if err := rows.Scan(&id, &name); err != nil {
return nil, err
}
out[id] = name
}
return out, rows.Err()
}
func (s *Store) BlockParents(ctx context.Context) (map[string]ParentRef, error) {
rows, err := s.queryContext(ctx, `select id, parent_id, parent_table from blocks`)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]ParentRef{}
for rows.Next() {
var id string
var parentID, parentTable sql.NullString
if err := rows.Scan(&id, &parentID, &parentTable); err != nil {
return nil, err
}
out[id] = ParentRef{ID: parentID.String, Table: parentTable.String}
}
return out, rows.Err()
}
func (s *Store) CollectionParents(ctx context.Context) (map[string]ParentRef, error) {
rows, err := s.queryContext(ctx, `select id, parent_id, parent_table from collections`)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]ParentRef{}
for rows.Next() {
var id string
var parentID, parentTable sql.NullString
if err := rows.Scan(&id, &parentID, &parentTable); err != nil {
return nil, err
}
out[id] = ParentRef{ID: parentID.String, Table: parentTable.String}
}
return out, rows.Err()
}
func (s *Store) SpaceName(ctx context.Context, id string) (string, error) {
if id == "" {
return "default", nil
}
var name sql.NullString
err := s.queryRowContext(ctx, `select name from spaces where id = ?`, id).Scan(&name)
err := s.db.QueryRowContext(ctx, `select name from spaces where id = ?`, id).Scan(&name)
if err != nil {
if err == sql.ErrNoRows {
return fallbackSpaceName(id), nil
return id, nil
}
return "", err
}
if name.Valid && name.String != "" {
return name.String, nil
}
return fallbackSpaceName(id), nil
}
func (s *Store) TeamName(ctx context.Context, id string) (string, error) {
if id == "" {
return "", nil
}
var name sql.NullString
err := s.queryRowContext(ctx, `select name from teams where id = ?`, id).Scan(&name)
if err != nil {
if err == sql.ErrNoRows {
return "team-" + shortID(id), nil
}
return "", err
}
if name.Valid && name.String != "" {
return name.String, nil
}
return "team-" + shortID(id), nil
}
func (s *Store) PageTeamID(ctx context.Context, page Page) (string, error) {
seen := map[string]bool{page.ID: true}
return s.resolveTeamID(ctx, page.ParentTable, page.ParentID, page.CollectionID, seen)
}
func (s *Store) resolveTeamID(ctx context.Context, table, id, collectionID string, seen map[string]bool) (string, error) {
if table == "team" {
return id, nil
}
if table == "collection" && id == "" {
id = collectionID
}
if id == "" || seen[table+":"+id] {
return "", nil
}
seen[table+":"+id] = true
switch table {
case "block":
var parentID, parentTable sql.NullString
err := s.queryRowContext(ctx, `select parent_id, parent_table from blocks where id = ?`, id).Scan(&parentID, &parentTable)
if err != nil {
if err == sql.ErrNoRows {
return "", nil
}
return "", err
}
return s.resolveTeamID(ctx, parentTable.String, parentID.String, "", seen)
case "collection", "database", "data_source":
var parentID, parentTable sql.NullString
err := s.queryRowContext(ctx, `select parent_id, parent_table from collections where id = ?`, id).Scan(&parentID, &parentTable)
if err != nil {
if err == sql.ErrNoRows {
return "", nil
}
return "", err
}
return s.resolveTeamID(ctx, parentTable.String, parentID.String, "", seen)
default:
return "", nil
}
}
func shortID(id string) string {
clean := strings.ReplaceAll(id, "-", "")
if len(clean) > 16 {
return clean[:8] + "-" + clean[len(clean)-8:]
}
if clean == "" {
return "unknown"
}
return clean
}
func fallbackSpaceName(id string) string {
return "External Space " + shortID(id)
return id, nil
}

View File

@ -17,11 +17,8 @@ import (
const schemaVersion = 1
type Store struct {
db *sql.DB
tx *sql.Tx
path string
deferredFTS int
deferredFTSPages map[string]bool
db *sql.DB
path string
}
func Open(path string) (*Store, error) {
@ -89,45 +86,6 @@ func (s *Store) DB() *sql.DB {
return s.db
}
func (s *Store) execContext(ctx context.Context, query string, args ...any) (sql.Result, error) {
if s.tx != nil {
return s.tx.ExecContext(ctx, query, args...)
}
return s.db.ExecContext(ctx, query, args...)
}
func (s *Store) queryContext(ctx context.Context, query string, args ...any) (*sql.Rows, error) {
if s.tx != nil {
return s.tx.QueryContext(ctx, query, args...)
}
return s.db.QueryContext(ctx, query, args...)
}
func (s *Store) queryRowContext(ctx context.Context, query string, args ...any) *sql.Row {
if s.tx != nil {
return s.tx.QueryRowContext(ctx, query, args...)
}
return s.db.QueryRowContext(ctx, query, args...)
}
func (s *Store) WithTransaction(ctx context.Context, fn func() error) error {
if s.tx != nil {
return fn()
}
tx, err := s.db.BeginTx(ctx, nil)
if err != nil {
return err
}
s.tx = tx
err = fn()
s.tx = nil
if err != nil {
_ = tx.Rollback()
return err
}
return tx.Commit()
}
func (s *Store) Close() error {
if s == nil || s.db == nil {
return nil
@ -141,7 +99,6 @@ type Status struct {
WALBytes int64 `json:"wal_bytes"`
Spaces int `json:"spaces"`
Users int `json:"users"`
Teams int `json:"teams"`
Pages int `json:"pages"`
Blocks int `json:"blocks"`
Collections int `json:"collections"`
@ -184,17 +141,6 @@ func (s *Store) init(ctx context.Context) error {
source text not null,
synced_at integer not null
)`,
`create table if not exists teams (
id text primary key,
space_id text,
parent_id text,
parent_table text,
name text not null,
raw_json text,
source text not null,
synced_at integer not null
)`,
`create index if not exists teams_space_id on teams(space_id)`,
`create table if not exists pages (
id text primary key,
space_id text,
@ -242,7 +188,6 @@ func (s *Store) init(ctx context.Context) error {
id text primary key,
space_id text,
parent_id text,
parent_table text,
name text,
schema_json text,
format_json text,
@ -292,12 +237,12 @@ func (s *Store) init(ctx context.Context) error {
`create virtual table if not exists comment_fts using fts5(comment_id unindexed, page_id unindexed, body)`,
}
for _, stmt := range stmts {
if _, err := s.execContext(ctx, stmt); err != nil {
if _, err := s.db.ExecContext(ctx, stmt); err != nil {
return err
}
}
var current int
row := s.queryRowContext(ctx, `select value from meta where key = 'schema_version'`)
row := s.db.QueryRowContext(ctx, `select value from meta where key = 'schema_version'`)
err := row.Scan(&current)
if err != nil && !errors.Is(err, sql.ErrNoRows) {
return err
@ -308,23 +253,20 @@ func (s *Store) init(ctx context.Context) error {
if err := s.ensureColumn(ctx, "blocks", "display_order", "integer not null default 0"); err != nil {
return err
}
if err := s.ensureColumn(ctx, "collections", "parent_table", "text"); err != nil {
if _, err := s.db.ExecContext(ctx, `create index if not exists blocks_page_alive_order on blocks(page_id, alive, parent_id, display_order, created_time, id)`); err != nil {
return err
}
if _, err := s.execContext(ctx, `create index if not exists blocks_page_alive_order on blocks(page_id, alive, parent_id, display_order, created_time, id)`); err != nil {
if _, err := s.db.ExecContext(ctx, `create index if not exists blocks_page_alive_created on blocks(page_id, alive, created_time, id)`); err != nil {
return err
}
if _, err := s.execContext(ctx, `create index if not exists blocks_page_alive_created on blocks(page_id, alive, created_time, id)`); err != nil {
return err
}
if _, err := s.execContext(ctx, `insert or replace into meta(key, value) values('schema_version', ?)`, schemaVersion); err != nil {
if _, err := s.db.ExecContext(ctx, `insert or replace into meta(key, value) values('schema_version', ?)`, schemaVersion); err != nil {
return err
}
return nil
}
func (s *Store) ensureColumn(ctx context.Context, table, column, definition string) error {
rows, err := s.queryContext(ctx, `pragma table_info(`+table+`)`)
rows, err := s.db.QueryContext(ctx, `pragma table_info(`+table+`)`)
if err != nil {
return err
}
@ -345,7 +287,7 @@ func (s *Store) ensureColumn(ctx context.Context, table, column, definition stri
if err := rows.Err(); err != nil {
return err
}
_, err = s.execContext(ctx, `alter table `+table+` add column `+column+` `+definition)
_, err = s.db.ExecContext(ctx, `alter table `+table+` add column `+column+` `+definition)
return err
}
@ -365,79 +307,23 @@ func IntBool(v int) bool {
}
func (s *Store) UpsertSpace(ctx context.Context, x Space) error {
_, err := s.execContext(ctx, `insert into spaces(id, name, raw_json, source, synced_at)
_, err := s.db.ExecContext(ctx, `insert into spaces(id, name, raw_json, source, synced_at)
values (?, ?, ?, ?, ?)
on conflict(id) do update set name=excluded.name, raw_json=excluded.raw_json, source=excluded.source, synced_at=excluded.synced_at`,
x.ID, x.Name, x.RawJSON, x.Source, x.SyncedAt)
return err
}
func (s *Store) EnsureSpaceFallbacks(ctx context.Context, source string) (int, error) {
rows, err := s.queryContext(ctx, `select distinct space_id from (
select space_id from pages
union all select space_id from blocks
union all select space_id from teams
union all select space_id from collections
union all select space_id from comments
union all select space_id from raw_records
)
where coalesce(space_id, '') <> ''
and space_id not in (select id from spaces)`)
if err != nil {
return 0, err
}
defer rows.Close()
var ids []string
for rows.Next() {
var id string
if err := rows.Scan(&id); err != nil {
return 0, err
}
ids = append(ids, id)
}
if err := rows.Err(); err != nil {
return 0, err
}
now := NowMS()
for _, id := range ids {
if err := s.UpsertSpace(ctx, Space{
ID: id,
Name: fallbackSpaceName(id),
RawJSON: fmt.Sprintf(`{"id":%q,"inferred":true}`, id),
Source: source,
SyncedAt: now,
}); err != nil {
return 0, err
}
}
return len(ids), nil
}
func (s *Store) UpsertUser(ctx context.Context, x User) error {
_, err := s.execContext(ctx, `insert into users(id, name, email, raw_json, source, synced_at)
_, err := s.db.ExecContext(ctx, `insert into users(id, name, email, raw_json, source, synced_at)
values (?, ?, ?, ?, ?, ?)
on conflict(id) do update set name=excluded.name, email=excluded.email, raw_json=excluded.raw_json, source=excluded.source, synced_at=excluded.synced_at`,
x.ID, x.Name, x.Email, x.RawJSON, x.Source, x.SyncedAt)
return err
}
func (s *Store) UpsertTeam(ctx context.Context, x Team) error {
_, err := s.execContext(ctx, `insert into teams(id, space_id, parent_id, parent_table, name, raw_json, source, synced_at)
values (?, ?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set
space_id=excluded.space_id,
parent_id=excluded.parent_id,
parent_table=excluded.parent_table,
name=excluded.name,
raw_json=excluded.raw_json,
source=excluded.source,
synced_at=excluded.synced_at`,
x.ID, x.SpaceID, x.ParentID, x.ParentTable, x.Name, x.RawJSON, x.Source, x.SyncedAt)
return err
}
func (s *Store) UpsertPage(ctx context.Context, x Page) error {
_, err := s.execContext(ctx, `insert into pages(
_, err := s.db.ExecContext(ctx, `insert into pages(
id, space_id, parent_id, parent_table, collection_id, title, url, icon, cover, properties_json,
created_time, last_edited_time, alive, source, raw_json, synced_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
@ -462,11 +348,11 @@ func (s *Store) UpsertPage(ctx context.Context, x Page) error {
if err != nil {
return err
}
return s.markPageFTS(ctx, x.ID)
return s.refreshPageFTS(ctx, x.ID)
}
func (s *Store) UpsertBlock(ctx context.Context, x Block) error {
_, err := s.execContext(ctx, `insert into blocks(
_, err := s.db.ExecContext(ctx, `insert into blocks(
id, page_id, space_id, parent_id, parent_table, type, text, properties_json, content_json, format_json,
display_order, created_time, last_edited_time, alive, source, raw_json, synced_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
@ -493,23 +379,23 @@ func (s *Store) UpsertBlock(ctx context.Context, x Block) error {
return err
}
if x.PageID != "" {
return s.markPageFTS(ctx, x.PageID)
return s.refreshPageFTS(ctx, x.PageID)
}
return nil
}
func (s *Store) UpsertCollection(ctx context.Context, x Collection) error {
_, err := s.execContext(ctx, `insert into collections(id, space_id, parent_id, parent_table, name, schema_json, format_json, raw_json, source, synced_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set space_id=excluded.space_id, parent_id=excluded.parent_id, parent_table=excluded.parent_table, name=excluded.name,
_, err := s.db.ExecContext(ctx, `insert into collections(id, space_id, parent_id, name, schema_json, format_json, raw_json, source, synced_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set space_id=excluded.space_id, parent_id=excluded.parent_id, name=excluded.name,
schema_json=excluded.schema_json, format_json=excluded.format_json, raw_json=excluded.raw_json,
source=excluded.source, synced_at=excluded.synced_at`,
x.ID, x.SpaceID, x.ParentID, x.ParentTable, x.Name, x.SchemaJSON, x.FormatJSON, x.RawJSON, x.Source, x.SyncedAt)
x.ID, x.SpaceID, x.ParentID, x.Name, x.SchemaJSON, x.FormatJSON, x.RawJSON, x.Source, x.SyncedAt)
return err
}
func (s *Store) UpsertComment(ctx context.Context, x Comment) error {
_, err := s.execContext(ctx, `insert into comments(id, page_id, space_id, parent_id, text, created_by_id, created_time, last_edited_time, alive, raw_json, source, synced_at)
_, err := s.db.ExecContext(ctx, `insert into comments(id, page_id, space_id, parent_id, text, created_by_id, created_time, last_edited_time, alive, raw_json, source, synced_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set page_id=excluded.page_id, space_id=excluded.space_id, parent_id=excluded.parent_id,
text=excluded.text, created_by_id=excluded.created_by_id, created_time=excluded.created_time,
@ -519,16 +405,16 @@ func (s *Store) UpsertComment(ctx context.Context, x Comment) error {
if err != nil {
return err
}
_, err = s.execContext(ctx, `delete from comment_fts where comment_id = ?`, x.ID)
_, err = s.db.ExecContext(ctx, `delete from comment_fts where comment_id = ?`, x.ID)
if err != nil {
return err
}
_, err = s.execContext(ctx, `insert into comment_fts(comment_id, page_id, body) values (?, ?, ?)`, x.ID, x.PageID, x.Text)
_, err = s.db.ExecContext(ctx, `insert into comment_fts(comment_id, page_id, body) values (?, ?, ?)`, x.ID, x.PageID, x.Text)
return err
}
func (s *Store) UpsertRawRecord(ctx context.Context, x RawRecord) error {
_, err := s.execContext(ctx, `insert into raw_records(source, record_table, record_id, parent_id, space_id, raw_json, synced_at)
_, err := s.db.ExecContext(ctx, `insert into raw_records(source, record_table, record_id, parent_id, space_id, raw_json, synced_at)
values (?, ?, ?, ?, ?, ?, ?)
on conflict(source, record_table, record_id) do update set parent_id=excluded.parent_id, space_id=excluded.space_id,
raw_json=excluded.raw_json, synced_at=excluded.synced_at`,
@ -537,54 +423,16 @@ func (s *Store) UpsertRawRecord(ctx context.Context, x RawRecord) error {
}
func (s *Store) SetSyncState(ctx context.Context, source, entityType, entityID, cursor string) error {
_, err := s.execContext(ctx, `insert into sync_state(source, entity_type, entity_id, cursor, synced_at)
_, err := s.db.ExecContext(ctx, `insert into sync_state(source, entity_type, entity_id, cursor, synced_at)
values (?, ?, ?, ?, ?)
on conflict(source, entity_type, entity_id) do update set cursor=excluded.cursor, synced_at=excluded.synced_at`,
source, entityType, entityID, cursor, NowMS())
return err
}
func (s *Store) DeferPageFTS(ctx context.Context, fn func() error) error {
outer := s.deferredFTS == 0
if outer {
s.deferredFTSPages = map[string]bool{}
}
s.deferredFTS++
err := fn()
s.deferredFTS--
if !outer {
return err
}
pages := s.deferredFTSPages
s.deferredFTSPages = nil
if err != nil {
return err
}
for pageID := range pages {
if err := s.refreshPageFTS(ctx, pageID); err != nil {
return err
}
}
return nil
}
func (s *Store) markPageFTS(ctx context.Context, pageID string) error {
if pageID == "" {
return nil
}
if s.deferredFTS > 0 {
if s.deferredFTSPages == nil {
s.deferredFTSPages = map[string]bool{}
}
s.deferredFTSPages[pageID] = true
return nil
}
return s.refreshPageFTS(ctx, pageID)
}
func (s *Store) refreshPageFTS(ctx context.Context, pageID string) error {
var title string
if err := s.queryRowContext(ctx, `select title from pages where id = ?`, pageID).Scan(&title); err != nil {
if err := s.db.QueryRowContext(ctx, `select title from pages where id = ?`, pageID).Scan(&title); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil
}
@ -595,10 +443,10 @@ func (s *Store) refreshPageFTS(ctx context.Context, pageID string) error {
return err
}
parts := pageBlockTextParts(pageID, blocks)
if _, err := s.execContext(ctx, `delete from page_fts where page_id = ?`, pageID); err != nil {
if _, err := s.db.ExecContext(ctx, `delete from page_fts where page_id = ?`, pageID); err != nil {
return err
}
_, err = s.execContext(ctx, `insert into page_fts(page_id, title, body) values (?, ?, ?)`, pageID, title, strings.Join(parts, "\n"))
_, err = s.db.ExecContext(ctx, `insert into page_fts(page_id, title, body) values (?, ?, ?)`, pageID, title, strings.Join(parts, "\n"))
return err
}
@ -655,30 +503,8 @@ func (s *Store) Search(ctx context.Context, q string, limit int) ([]SearchResult
if limit <= 0 {
limit = 20
}
rows, err := s.queryContext(ctx, `select kind, id, title, text from (
select 'page' as kind,
page_fts.page_id as id,
page_fts.title as title,
snippet(page_fts, 2, '[', ']', '...', 16) as text,
bm25(page_fts) as rank,
coalesce(p.last_edited_time, p.created_time, 0) as edited_at
from page_fts
join pages p on p.id = page_fts.page_id
where page_fts match ?
union all
select 'comment' as kind,
comment_fts.comment_id as id,
coalesce(p.title, '') as title,
snippet(comment_fts, 2, '[', ']', '...', 16) as text,
bm25(comment_fts) as rank,
coalesce(c.last_edited_time, c.created_time, 0) as edited_at
from comment_fts
join comments c on c.id = comment_fts.comment_id
left join pages p on p.id = comment_fts.page_id
where comment_fts match ?
)
order by rank, edited_at desc, kind, lower(title), id
limit ?`, q, q, limit)
rows, err := s.db.QueryContext(ctx, `select 'page', page_id, title, snippet(page_fts, 2, '[', ']', '...', 16)
from page_fts where page_fts match ? limit ?`, q, limit)
if err != nil {
return nil, err
}
@ -695,10 +521,10 @@ func (s *Store) Search(ctx context.Context, q string, limit int) ([]SearchResult
}
func (s *Store) RebuildFTS(ctx context.Context) error {
if _, err := s.execContext(ctx, `delete from page_fts`); err != nil {
if _, err := s.db.ExecContext(ctx, `delete from page_fts`); err != nil {
return err
}
rows, err := s.queryContext(ctx, `select id from pages`)
rows, err := s.db.QueryContext(ctx, `select id from pages`)
if err != nil {
return err
}
@ -719,10 +545,10 @@ func (s *Store) RebuildFTS(ctx context.Context) error {
return err
}
}
if _, err := s.execContext(ctx, `delete from comment_fts`); err != nil {
if _, err := s.db.ExecContext(ctx, `delete from comment_fts`); err != nil {
return err
}
_, err = s.execContext(ctx, `insert into comment_fts(comment_id, page_id, body) select id, page_id, text from comments where alive = 1`)
_, err = s.db.ExecContext(ctx, `insert into comment_fts(comment_id, page_id, body) select id, page_id, text from comments where alive = 1`)
return err
}
@ -734,7 +560,6 @@ func (s *Store) Status(ctx context.Context) (Status, error) {
}{
{`select count(*) from spaces`, &status.Spaces},
{`select count(*) from users`, &status.Users},
{`select count(*) from teams`, &status.Teams},
{`select count(*) from pages`, &status.Pages},
{`select count(*) from blocks`, &status.Blocks},
{`select count(*) from collections`, &status.Collections},
@ -742,11 +567,11 @@ func (s *Store) Status(ctx context.Context) (Status, error) {
{`select count(*) from raw_records`, &status.RawRecords},
}
for _, count := range counts {
if err := s.queryRowContext(ctx, count.query).Scan(count.dest); err != nil {
if err := s.db.QueryRowContext(ctx, count.query).Scan(count.dest); err != nil {
return Status{}, err
}
}
if err := s.queryRowContext(ctx, `select coalesce(max(synced_at), 0) from sync_state`).Scan(&status.LastSyncAt); err != nil {
if err := s.db.QueryRowContext(ctx, `select coalesce(max(synced_at), 0) from sync_state`).Scan(&status.LastSyncAt); err != nil {
return Status{}, err
}
status.DBBytes = fileSize(s.path)
@ -764,12 +589,12 @@ func (s *Store) Optimize(ctx context.Context, vacuum bool) (MaintenanceSummary,
`pragma optimize`,
`analyze`,
} {
if _, err := s.execContext(ctx, stmt); err != nil {
if _, err := s.db.ExecContext(ctx, stmt); err != nil {
return MaintenanceSummary{}, err
}
}
if vacuum {
if _, err := s.execContext(ctx, `vacuum`); err != nil {
if _, err := s.db.ExecContext(ctx, `vacuum`); err != nil {
return MaintenanceSummary{}, err
}
}

View File

@ -2,7 +2,6 @@ package store
import (
"context"
"errors"
"os"
"path/filepath"
"testing"
@ -34,171 +33,6 @@ func TestStoreUpsertsAndSearchesPage(t *testing.T) {
}
}
func TestStoreSearchRanksByRelevanceThenRecency(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
pages := []Page{
{ID: "old", Title: "Old", LastEditedTime: now - 1000, Alive: true, Source: "test", SyncedAt: now},
{ID: "new", Title: "New", LastEditedTime: now, Alive: true, Source: "test", SyncedAt: now},
}
for _, page := range pages {
if err := st.UpsertPage(ctx, page); err != nil {
t.Fatal(err)
}
if err := st.UpsertBlock(ctx, Block{ID: page.ID + "-block", PageID: page.ID, Type: "text", Text: "needle", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
}
results, err := st.Search(ctx, "needle", 10)
if err != nil {
t.Fatal(err)
}
if len(results) < 2 || results[0].ID != "new" || results[1].ID != "old" {
t.Fatalf("expected newer equal-rank page first, got %+v", results)
}
}
func TestStoreSearchIncludesComments(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Launch", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertComment(ctx, Comment{ID: "comment1", PageID: "page1", Text: "needle from a comment", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
results, err := st.Search(ctx, "needle", 10)
if err != nil {
t.Fatal(err)
}
if len(results) != 1 || results[0].Kind != "comment" || results[0].ID != "comment1" || results[0].Title != "Launch" {
t.Fatalf("expected comment search result with page title, got %+v", results)
}
}
func TestStoreDefersPageFTSRefresh(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
err = st.DeferPageFTS(ctx, func() error {
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Launch Plan", Alive: true, Source: "test", SyncedAt: now}); err != nil {
return err
}
if err := st.UpsertBlock(ctx, Block{ID: "block1", PageID: "page1", Type: "text", Text: "deferred sqlite refresh", Alive: true, Source: "test", SyncedAt: now}); err != nil {
return err
}
results, err := st.Search(ctx, "sqlite", 10)
if err != nil {
return err
}
if len(results) != 0 {
t.Fatalf("expected deferred FTS to stay stale inside callback, got %+v", results)
}
return nil
})
if err != nil {
t.Fatal(err)
}
results, err := st.Search(ctx, "sqlite", 10)
if err != nil {
t.Fatal(err)
}
if len(results) != 1 || results[0].ID != "page1" {
t.Fatalf("expected refreshed FTS after callback, got %+v", results)
}
}
func TestStoreTransactionCommitsAndRollsBack(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
if err := st.WithTransaction(ctx, func() error {
return st.UpsertPage(ctx, Page{ID: "commit", Title: "Commit", Alive: true, Source: "test", SyncedAt: now})
}); err != nil {
t.Fatal(err)
}
var count int
if err := st.DB().QueryRowContext(ctx, `select count(*) from pages where id = 'commit'`).Scan(&count); err != nil {
t.Fatal(err)
}
if count != 1 {
t.Fatalf("expected committed page, got %d", count)
}
sentinel := errors.New("rollback")
err = st.WithTransaction(ctx, func() error {
if err := st.UpsertPage(ctx, Page{ID: "rollback", Title: "Rollback", Alive: true, Source: "test", SyncedAt: now}); err != nil {
return err
}
return sentinel
})
if !errors.Is(err, sentinel) {
t.Fatalf("expected rollback error, got %v", err)
}
if err := st.DB().QueryRowContext(ctx, `select count(*) from pages where id = 'rollback'`).Scan(&count); err != nil {
t.Fatal(err)
}
if count != 0 {
t.Fatalf("expected rolled back page, got %d", count)
}
}
func TestStoreEnsuresFallbackSpaces(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
spaceID := "52f1c029-1111-2222-3333-ea9259e0"
if err := st.UpsertPage(ctx, Page{ID: "page1", SpaceID: spaceID, Title: "Loose", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
added, err := st.EnsureSpaceFallbacks(ctx, "test")
if err != nil {
t.Fatal(err)
}
if added != 1 {
t.Fatalf("expected one fallback space, got %d", added)
}
name, err := st.SpaceName(ctx, spaceID)
if err != nil {
t.Fatal(err)
}
if name != "External Space 52f1c029-ea9259e0" {
t.Fatalf("unexpected fallback space name: %q", name)
}
added, err = st.EnsureSpaceFallbacks(ctx, "test")
if err != nil {
t.Fatal(err)
}
if added != 0 {
t.Fatalf("expected fallback insertion to be idempotent, got %d", added)
}
}
func TestStoreOrdersBlocksByDisplayOrder(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
@ -260,62 +94,6 @@ func TestStoreBuildsPageFTSInDisplayTreeOrder(t *testing.T) {
}
}
func TestStoreResolvesPageTeamThroughCollectionParent(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
if err := st.UpsertTeam(ctx, Team{ID: "team1", SpaceID: "space1", Name: "Research", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertCollection(ctx, Collection{ID: "collection1", SpaceID: "space1", ParentID: "team1", ParentTable: "team", Name: "Roadmap", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
page := Page{ID: "page1", SpaceID: "space1", ParentID: "collection1", ParentTable: "collection", CollectionID: "collection1", Title: "Row", Alive: true, Source: "test", SyncedAt: now}
if err := st.UpsertPage(ctx, page); err != nil {
t.Fatal(err)
}
teamID, err := st.PageTeamID(ctx, page)
if err != nil {
t.Fatal(err)
}
if teamID != "team1" {
t.Fatalf("expected team1, got %q", teamID)
}
}
func TestStoreResolvesPageTeamThroughBlockParent(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
if err := st.UpsertTeam(ctx, Team{ID: "team1", SpaceID: "space1", Name: "Research", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertBlock(ctx, Block{ID: "block1", SpaceID: "space1", ParentID: "team1", ParentTable: "team", Type: "text", Text: "parent", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
page := Page{ID: "page1", SpaceID: "space1", ParentID: "block1", ParentTable: "block", Title: "Child", Alive: true, Source: "test", SyncedAt: now}
if err := st.UpsertPage(ctx, page); err != nil {
t.Fatal(err)
}
teamID, err := st.PageTeamID(ctx, page)
if err != nil {
t.Fatal(err)
}
if teamID != "team1" {
t.Fatalf("expected team1, got %q", teamID)
}
}
func TestStoreStatusAndOptimize(t *testing.T) {
path := filepath.Join(t.TempDir(), "notcrawl.db")
st, err := Open(path)

View File

@ -17,17 +17,6 @@ type User struct {
SyncedAt int64
}
type Team struct {
ID string
SpaceID string
ParentID string
ParentTable string
Name string
RawJSON string
Source string
SyncedAt int64
}
type Page struct {
ID string
SpaceID string
@ -68,16 +57,15 @@ type Block struct {
}
type Collection struct {
ID string
SpaceID string
ParentID string
ParentTable string
Name string
SchemaJSON string
FormatJSON string
RawJSON string
Source string
SyncedAt int64
ID string
SpaceID string
ParentID string
Name string
SchemaJSON string
FormatJSON string
RawJSON string
Source string
SyncedAt int64
}
type Comment struct {
@ -105,11 +93,6 @@ type RawRecord struct {
SyncedAt int64
}
type ParentRef struct {
ID string
Table string
}
type SearchResult struct {
Kind string
ID string