fix(api): preserve block display order
Persist Notion API sibling order and use it for page blocks, Markdown export, and page FTS tree text.
This commit is contained in:
parent
47e96fd4c0
commit
25be299fab
@ -126,6 +126,9 @@ func renderBlocks(b *strings.Builder, pageID string, blocks []store.Block) {
|
||||
for parent := range children {
|
||||
sort.SliceStable(children[parent], func(i, j int) bool {
|
||||
a, z := children[parent][i], children[parent][j]
|
||||
if a.DisplayOrder != z.DisplayOrder {
|
||||
return a.DisplayOrder < z.DisplayOrder
|
||||
}
|
||||
if a.CreatedTime == z.CreatedTime {
|
||||
return a.ID < z.ID
|
||||
}
|
||||
|
||||
@ -44,3 +44,37 @@ func TestExporterWritesMarkdown(t *testing.T) {
|
||||
t.Fatalf("unexpected markdown:\n%s", text)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExporterUsesDisplayOrder(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer st.Close()
|
||||
now := store.NowMS()
|
||||
if err := st.UpsertPage(ctx, store.Page{ID: "page1", Title: "Recipe", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, block := range []store.Block{
|
||||
{ID: "salt", PageID: "page1", ParentID: "page1", Type: "bulleted_list", Text: "salt", DisplayOrder: 2, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
{ID: "flour", PageID: "page1", ParentID: "page1", Type: "bulleted_list", Text: "flour", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
} {
|
||||
if err := st.UpsertBlock(ctx, block); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
dir := t.TempDir()
|
||||
s, err := Exporter{Store: st, Dir: dir}.Export(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
b, err := os.ReadFile(s.Files[0])
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
text := string(b)
|
||||
if strings.Index(text, "- flour") > strings.Index(text, "- salt") {
|
||||
t.Fatalf("markdown did not preserve display order:\n%s", text)
|
||||
}
|
||||
}
|
||||
|
||||
@ -332,6 +332,7 @@ func (c Client) usesDataSourceAPI() bool {
|
||||
func (c Client) walkBlocks(ctx context.Context, st *store.Store, pageID, parentID, spaceID string) (int, error) {
|
||||
var count int
|
||||
cursor := ""
|
||||
var displayOrder int64
|
||||
for {
|
||||
path := fmt.Sprintf("/blocks/%s/children?page_size=100", url.PathEscape(parentID))
|
||||
if cursor != "" {
|
||||
@ -351,6 +352,7 @@ func (c Client) walkBlocks(ctx context.Context, st *store.Store, pageID, parentI
|
||||
typeBody := block[typ]
|
||||
text := notiontext.Plain(typeBody)
|
||||
raw := notiontext.MarshalRaw(block)
|
||||
displayOrder++
|
||||
if err := st.UpsertBlock(ctx, store.Block{
|
||||
ID: block.string("id"),
|
||||
PageID: pageID,
|
||||
@ -360,6 +362,7 @@ func (c Client) walkBlocks(ctx context.Context, st *store.Store, pageID, parentI
|
||||
Type: typ,
|
||||
Text: text,
|
||||
PropertiesJSON: marshalAny(typeBody),
|
||||
DisplayOrder: displayOrder,
|
||||
CreatedTime: parseTimeMS(block.string("created_time")),
|
||||
LastEditedTime: parseTimeMS(block.string("last_edited_time")),
|
||||
Alive: !block.bool("archived") && !block.bool("in_trash"),
|
||||
|
||||
@ -76,8 +76,8 @@ func (s *Store) CollectionPages(ctx context.Context, collectionID string) ([]Pag
|
||||
|
||||
func (s *Store) PageBlocks(ctx context.Context, pageID string) ([]Block, error) {
|
||||
rows, err := s.db.QueryContext(ctx, `select id, page_id, space_id, parent_id, parent_table, type, text, properties_json,
|
||||
content_json, format_json, created_time, last_edited_time, alive, source, raw_json, synced_at
|
||||
from blocks where page_id = ? and alive = 1 order by created_time, id`, pageID)
|
||||
content_json, format_json, display_order, created_time, last_edited_time, alive, source, raw_json, synced_at
|
||||
from blocks where page_id = ? and alive = 1 order by parent_id, display_order, created_time, id`, pageID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -87,7 +87,7 @@ func (s *Store) PageBlocks(ctx context.Context, pageID string) ([]Block, error)
|
||||
var b Block
|
||||
var alive int
|
||||
if err := rows.Scan(&b.ID, &b.PageID, &b.SpaceID, &b.ParentID, &b.ParentTable, &b.Type, &b.Text, &b.PropertiesJSON,
|
||||
&b.ContentJSON, &b.FormatJSON, &b.CreatedTime, &b.LastEditedTime, &alive, &b.Source, &b.RawJSON, &b.SyncedAt); err != nil {
|
||||
&b.ContentJSON, &b.FormatJSON, &b.DisplayOrder, &b.CreatedTime, &b.LastEditedTime, &alive, &b.Source, &b.RawJSON, &b.SyncedAt); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
b.Alive = IntBool(alive)
|
||||
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -173,6 +174,7 @@ func (s *Store) init(ctx context.Context) error {
|
||||
properties_json text,
|
||||
content_json text,
|
||||
format_json text,
|
||||
display_order integer not null default 0,
|
||||
created_time integer,
|
||||
last_edited_time integer,
|
||||
alive integer not null,
|
||||
@ -181,7 +183,6 @@ func (s *Store) init(ctx context.Context) error {
|
||||
synced_at integer not null
|
||||
)`,
|
||||
`create index if not exists blocks_page_id on blocks(page_id)`,
|
||||
`create index if not exists blocks_page_alive_created on blocks(page_id, alive, created_time, id)`,
|
||||
`create index if not exists blocks_parent_id on blocks(parent_id)`,
|
||||
`create table if not exists collections (
|
||||
id text primary key,
|
||||
@ -249,12 +250,47 @@ func (s *Store) init(ctx context.Context) error {
|
||||
if current > schemaVersion {
|
||||
return fmt.Errorf("database schema version %d is newer than this notcrawl build supports (%d)", current, schemaVersion)
|
||||
}
|
||||
if err := s.ensureColumn(ctx, "blocks", "display_order", "integer not null default 0"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := s.db.ExecContext(ctx, `create index if not exists blocks_page_alive_order on blocks(page_id, alive, parent_id, display_order, created_time, id)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := s.db.ExecContext(ctx, `create index if not exists blocks_page_alive_created on blocks(page_id, alive, created_time, id)`); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := s.db.ExecContext(ctx, `insert or replace into meta(key, value) values('schema_version', ?)`, schemaVersion); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) ensureColumn(ctx context.Context, table, column, definition string) error {
|
||||
rows, err := s.db.QueryContext(ctx, `pragma table_info(`+table+`)`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var cid int
|
||||
var name, typ string
|
||||
var notNull int
|
||||
var defaultValue any
|
||||
var pk int
|
||||
if err := rows.Scan(&cid, &name, &typ, ¬Null, &defaultValue, &pk); err != nil {
|
||||
return err
|
||||
}
|
||||
if name == column {
|
||||
return rows.Err()
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = s.db.ExecContext(ctx, `alter table `+table+` add column `+column+` `+definition)
|
||||
return err
|
||||
}
|
||||
|
||||
func NowMS() int64 {
|
||||
return time.Now().UnixMilli()
|
||||
}
|
||||
@ -318,8 +354,8 @@ func (s *Store) UpsertPage(ctx context.Context, x Page) error {
|
||||
func (s *Store) UpsertBlock(ctx context.Context, x Block) error {
|
||||
_, err := s.db.ExecContext(ctx, `insert into blocks(
|
||||
id, page_id, space_id, parent_id, parent_table, type, text, properties_json, content_json, format_json,
|
||||
created_time, last_edited_time, alive, source, raw_json, synced_at)
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
display_order, created_time, last_edited_time, alive, source, raw_json, synced_at)
|
||||
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
on conflict(id) do update set
|
||||
page_id=excluded.page_id,
|
||||
space_id=excluded.space_id,
|
||||
@ -330,6 +366,7 @@ func (s *Store) UpsertBlock(ctx context.Context, x Block) error {
|
||||
properties_json=excluded.properties_json,
|
||||
content_json=excluded.content_json,
|
||||
format_json=excluded.format_json,
|
||||
display_order=excluded.display_order,
|
||||
created_time=excluded.created_time,
|
||||
last_edited_time=excluded.last_edited_time,
|
||||
alive=excluded.alive,
|
||||
@ -337,7 +374,7 @@ func (s *Store) UpsertBlock(ctx context.Context, x Block) error {
|
||||
raw_json=excluded.raw_json,
|
||||
synced_at=excluded.synced_at`,
|
||||
x.ID, x.PageID, x.SpaceID, x.ParentID, x.ParentTable, x.Type, x.Text, x.PropertiesJSON, x.ContentJSON, x.FormatJSON,
|
||||
x.CreatedTime, x.LastEditedTime, BoolInt(x.Alive), x.Source, x.RawJSON, x.SyncedAt)
|
||||
x.DisplayOrder, x.CreatedTime, x.LastEditedTime, BoolInt(x.Alive), x.Source, x.RawJSON, x.SyncedAt)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -401,24 +438,11 @@ func (s *Store) refreshPageFTS(ctx context.Context, pageID string) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
rows, err := s.db.QueryContext(ctx, `select text from blocks where page_id = ? and alive = 1 order by created_time, id`, pageID)
|
||||
blocks, err := s.PageBlocks(ctx, pageID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
var parts []string
|
||||
for rows.Next() {
|
||||
var text sql.NullString
|
||||
if err := rows.Scan(&text); err != nil {
|
||||
return err
|
||||
}
|
||||
if text.Valid && strings.TrimSpace(text.String) != "" {
|
||||
parts = append(parts, text.String)
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
parts := pageBlockTextParts(pageID, blocks)
|
||||
if _, err := s.db.ExecContext(ctx, `delete from page_fts where page_id = ?`, pageID); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -426,6 +450,55 @@ func (s *Store) refreshPageFTS(ctx context.Context, pageID string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func pageBlockTextParts(pageID string, blocks []Block) []string {
|
||||
children := map[string][]Block{}
|
||||
for _, block := range blocks {
|
||||
if block.ID == pageID {
|
||||
continue
|
||||
}
|
||||
children[block.ParentID] = append(children[block.ParentID], block)
|
||||
}
|
||||
for parent := range children {
|
||||
sortBlockSiblings(children[parent])
|
||||
}
|
||||
|
||||
var parts []string
|
||||
var appendChildren func(string)
|
||||
appendChildren = func(parentID string) {
|
||||
for _, block := range children[parentID] {
|
||||
if strings.TrimSpace(block.Text) != "" {
|
||||
parts = append(parts, block.Text)
|
||||
}
|
||||
appendChildren(block.ID)
|
||||
}
|
||||
}
|
||||
appendChildren(pageID)
|
||||
if len(children[pageID]) == 0 {
|
||||
for _, block := range blocks {
|
||||
if block.ID == pageID || block.ParentID == pageID {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(block.Text) != "" {
|
||||
parts = append(parts, block.Text)
|
||||
}
|
||||
}
|
||||
}
|
||||
return parts
|
||||
}
|
||||
|
||||
func sortBlockSiblings(blocks []Block) {
|
||||
sort.SliceStable(blocks, func(i, j int) bool {
|
||||
a, z := blocks[i], blocks[j]
|
||||
if a.DisplayOrder != z.DisplayOrder {
|
||||
return a.DisplayOrder < z.DisplayOrder
|
||||
}
|
||||
if a.CreatedTime == z.CreatedTime {
|
||||
return a.ID < z.ID
|
||||
}
|
||||
return a.CreatedTime < z.CreatedTime
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Store) Search(ctx context.Context, q string, limit int) ([]SearchResult, error) {
|
||||
if limit <= 0 {
|
||||
limit = 20
|
||||
|
||||
@ -33,6 +33,67 @@ func TestStoreUpsertsAndSearchesPage(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreOrdersBlocksByDisplayOrder(t *testing.T) {
|
||||
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer st.Close()
|
||||
ctx := context.Background()
|
||||
now := NowMS()
|
||||
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Launch Plan", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
blocks := []Block{
|
||||
{ID: "third", PageID: "page1", ParentID: "page1", Type: "text", Text: "third", DisplayOrder: 3, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
{ID: "first", PageID: "page1", ParentID: "page1", Type: "text", Text: "first", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
{ID: "second", PageID: "page1", ParentID: "page1", Type: "text", Text: "second", DisplayOrder: 2, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
}
|
||||
for _, block := range blocks {
|
||||
if err := st.UpsertBlock(ctx, block); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
got, err := st.PageBlocks(ctx, "page1")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(got) != 3 || got[0].ID != "first" || got[1].ID != "second" || got[2].ID != "third" {
|
||||
t.Fatalf("unexpected block order: %+v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreBuildsPageFTSInDisplayTreeOrder(t *testing.T) {
|
||||
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer st.Close()
|
||||
ctx := context.Background()
|
||||
now := NowMS()
|
||||
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Recipe", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
blocks := []Block{
|
||||
{ID: "z-root", PageID: "page1", ParentID: "page1", Type: "text", Text: "third", DisplayOrder: 2, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
{ID: "a-child", PageID: "page1", ParentID: "a-root", Type: "text", Text: "second", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
{ID: "a-root", PageID: "page1", ParentID: "page1", Type: "text", Text: "first", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
}
|
||||
for _, block := range blocks {
|
||||
if err := st.UpsertBlock(ctx, block); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
var body string
|
||||
if err := st.DB().QueryRowContext(ctx, `select body from page_fts where page_id = ?`, "page1").Scan(&body); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if body != "first\nsecond\nthird" {
|
||||
t.Fatalf("unexpected FTS body order: %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreStatusAndOptimize(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "notcrawl.db")
|
||||
st, err := Open(path)
|
||||
|
||||
@ -47,6 +47,7 @@ type Block struct {
|
||||
PropertiesJSON string
|
||||
ContentJSON string
|
||||
FormatJSON string
|
||||
DisplayOrder int64
|
||||
CreatedTime int64
|
||||
LastEditedTime int64
|
||||
Alive bool
|
||||
|
||||
Loading…
Reference in New Issue
Block a user