fix(api): preserve block display order

Persist Notion API sibling order and use it for page blocks, Markdown export, and page FTS tree text.
This commit is contained in:
davelutztx 2026-04-27 12:54:04 -05:00 committed by GitHub
parent 47e96fd4c0
commit 25be299fab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 197 additions and 22 deletions

View File

@ -126,6 +126,9 @@ func renderBlocks(b *strings.Builder, pageID string, blocks []store.Block) {
for parent := range children {
sort.SliceStable(children[parent], func(i, j int) bool {
a, z := children[parent][i], children[parent][j]
if a.DisplayOrder != z.DisplayOrder {
return a.DisplayOrder < z.DisplayOrder
}
if a.CreatedTime == z.CreatedTime {
return a.ID < z.ID
}

View File

@ -44,3 +44,37 @@ func TestExporterWritesMarkdown(t *testing.T) {
t.Fatalf("unexpected markdown:\n%s", text)
}
}
func TestExporterUsesDisplayOrder(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
now := store.NowMS()
if err := st.UpsertPage(ctx, store.Page{ID: "page1", Title: "Recipe", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
for _, block := range []store.Block{
{ID: "salt", PageID: "page1", ParentID: "page1", Type: "bulleted_list", Text: "salt", DisplayOrder: 2, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
{ID: "flour", PageID: "page1", ParentID: "page1", Type: "bulleted_list", Text: "flour", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
} {
if err := st.UpsertBlock(ctx, block); err != nil {
t.Fatal(err)
}
}
dir := t.TempDir()
s, err := Exporter{Store: st, Dir: dir}.Export(ctx)
if err != nil {
t.Fatal(err)
}
b, err := os.ReadFile(s.Files[0])
if err != nil {
t.Fatal(err)
}
text := string(b)
if strings.Index(text, "- flour") > strings.Index(text, "- salt") {
t.Fatalf("markdown did not preserve display order:\n%s", text)
}
}

View File

@ -332,6 +332,7 @@ func (c Client) usesDataSourceAPI() bool {
func (c Client) walkBlocks(ctx context.Context, st *store.Store, pageID, parentID, spaceID string) (int, error) {
var count int
cursor := ""
var displayOrder int64
for {
path := fmt.Sprintf("/blocks/%s/children?page_size=100", url.PathEscape(parentID))
if cursor != "" {
@ -351,6 +352,7 @@ func (c Client) walkBlocks(ctx context.Context, st *store.Store, pageID, parentI
typeBody := block[typ]
text := notiontext.Plain(typeBody)
raw := notiontext.MarshalRaw(block)
displayOrder++
if err := st.UpsertBlock(ctx, store.Block{
ID: block.string("id"),
PageID: pageID,
@ -360,6 +362,7 @@ func (c Client) walkBlocks(ctx context.Context, st *store.Store, pageID, parentI
Type: typ,
Text: text,
PropertiesJSON: marshalAny(typeBody),
DisplayOrder: displayOrder,
CreatedTime: parseTimeMS(block.string("created_time")),
LastEditedTime: parseTimeMS(block.string("last_edited_time")),
Alive: !block.bool("archived") && !block.bool("in_trash"),

View File

@ -76,8 +76,8 @@ func (s *Store) CollectionPages(ctx context.Context, collectionID string) ([]Pag
func (s *Store) PageBlocks(ctx context.Context, pageID string) ([]Block, error) {
rows, err := s.db.QueryContext(ctx, `select id, page_id, space_id, parent_id, parent_table, type, text, properties_json,
content_json, format_json, created_time, last_edited_time, alive, source, raw_json, synced_at
from blocks where page_id = ? and alive = 1 order by created_time, id`, pageID)
content_json, format_json, display_order, created_time, last_edited_time, alive, source, raw_json, synced_at
from blocks where page_id = ? and alive = 1 order by parent_id, display_order, created_time, id`, pageID)
if err != nil {
return nil, err
}
@ -87,7 +87,7 @@ func (s *Store) PageBlocks(ctx context.Context, pageID string) ([]Block, error)
var b Block
var alive int
if err := rows.Scan(&b.ID, &b.PageID, &b.SpaceID, &b.ParentID, &b.ParentTable, &b.Type, &b.Text, &b.PropertiesJSON,
&b.ContentJSON, &b.FormatJSON, &b.CreatedTime, &b.LastEditedTime, &alive, &b.Source, &b.RawJSON, &b.SyncedAt); err != nil {
&b.ContentJSON, &b.FormatJSON, &b.DisplayOrder, &b.CreatedTime, &b.LastEditedTime, &alive, &b.Source, &b.RawJSON, &b.SyncedAt); err != nil {
return nil, err
}
b.Alive = IntBool(alive)

View File

@ -7,6 +7,7 @@ import (
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
@ -173,6 +174,7 @@ func (s *Store) init(ctx context.Context) error {
properties_json text,
content_json text,
format_json text,
display_order integer not null default 0,
created_time integer,
last_edited_time integer,
alive integer not null,
@ -181,7 +183,6 @@ func (s *Store) init(ctx context.Context) error {
synced_at integer not null
)`,
`create index if not exists blocks_page_id on blocks(page_id)`,
`create index if not exists blocks_page_alive_created on blocks(page_id, alive, created_time, id)`,
`create index if not exists blocks_parent_id on blocks(parent_id)`,
`create table if not exists collections (
id text primary key,
@ -249,12 +250,47 @@ func (s *Store) init(ctx context.Context) error {
if current > schemaVersion {
return fmt.Errorf("database schema version %d is newer than this notcrawl build supports (%d)", current, schemaVersion)
}
if err := s.ensureColumn(ctx, "blocks", "display_order", "integer not null default 0"); err != nil {
return err
}
if _, err := s.db.ExecContext(ctx, `create index if not exists blocks_page_alive_order on blocks(page_id, alive, parent_id, display_order, created_time, id)`); err != nil {
return err
}
if _, err := s.db.ExecContext(ctx, `create index if not exists blocks_page_alive_created on blocks(page_id, alive, created_time, id)`); err != nil {
return err
}
if _, err := s.db.ExecContext(ctx, `insert or replace into meta(key, value) values('schema_version', ?)`, schemaVersion); err != nil {
return err
}
return nil
}
func (s *Store) ensureColumn(ctx context.Context, table, column, definition string) error {
rows, err := s.db.QueryContext(ctx, `pragma table_info(`+table+`)`)
if err != nil {
return err
}
defer rows.Close()
for rows.Next() {
var cid int
var name, typ string
var notNull int
var defaultValue any
var pk int
if err := rows.Scan(&cid, &name, &typ, &notNull, &defaultValue, &pk); err != nil {
return err
}
if name == column {
return rows.Err()
}
}
if err := rows.Err(); err != nil {
return err
}
_, err = s.db.ExecContext(ctx, `alter table `+table+` add column `+column+` `+definition)
return err
}
func NowMS() int64 {
return time.Now().UnixMilli()
}
@ -318,8 +354,8 @@ func (s *Store) UpsertPage(ctx context.Context, x Page) error {
func (s *Store) UpsertBlock(ctx context.Context, x Block) error {
_, err := s.db.ExecContext(ctx, `insert into blocks(
id, page_id, space_id, parent_id, parent_table, type, text, properties_json, content_json, format_json,
created_time, last_edited_time, alive, source, raw_json, synced_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
display_order, created_time, last_edited_time, alive, source, raw_json, synced_at)
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict(id) do update set
page_id=excluded.page_id,
space_id=excluded.space_id,
@ -330,6 +366,7 @@ func (s *Store) UpsertBlock(ctx context.Context, x Block) error {
properties_json=excluded.properties_json,
content_json=excluded.content_json,
format_json=excluded.format_json,
display_order=excluded.display_order,
created_time=excluded.created_time,
last_edited_time=excluded.last_edited_time,
alive=excluded.alive,
@ -337,7 +374,7 @@ func (s *Store) UpsertBlock(ctx context.Context, x Block) error {
raw_json=excluded.raw_json,
synced_at=excluded.synced_at`,
x.ID, x.PageID, x.SpaceID, x.ParentID, x.ParentTable, x.Type, x.Text, x.PropertiesJSON, x.ContentJSON, x.FormatJSON,
x.CreatedTime, x.LastEditedTime, BoolInt(x.Alive), x.Source, x.RawJSON, x.SyncedAt)
x.DisplayOrder, x.CreatedTime, x.LastEditedTime, BoolInt(x.Alive), x.Source, x.RawJSON, x.SyncedAt)
if err != nil {
return err
}
@ -401,24 +438,11 @@ func (s *Store) refreshPageFTS(ctx context.Context, pageID string) error {
}
return err
}
rows, err := s.db.QueryContext(ctx, `select text from blocks where page_id = ? and alive = 1 order by created_time, id`, pageID)
blocks, err := s.PageBlocks(ctx, pageID)
if err != nil {
return err
}
defer rows.Close()
var parts []string
for rows.Next() {
var text sql.NullString
if err := rows.Scan(&text); err != nil {
return err
}
if text.Valid && strings.TrimSpace(text.String) != "" {
parts = append(parts, text.String)
}
}
if err := rows.Err(); err != nil {
return err
}
parts := pageBlockTextParts(pageID, blocks)
if _, err := s.db.ExecContext(ctx, `delete from page_fts where page_id = ?`, pageID); err != nil {
return err
}
@ -426,6 +450,55 @@ func (s *Store) refreshPageFTS(ctx context.Context, pageID string) error {
return err
}
func pageBlockTextParts(pageID string, blocks []Block) []string {
children := map[string][]Block{}
for _, block := range blocks {
if block.ID == pageID {
continue
}
children[block.ParentID] = append(children[block.ParentID], block)
}
for parent := range children {
sortBlockSiblings(children[parent])
}
var parts []string
var appendChildren func(string)
appendChildren = func(parentID string) {
for _, block := range children[parentID] {
if strings.TrimSpace(block.Text) != "" {
parts = append(parts, block.Text)
}
appendChildren(block.ID)
}
}
appendChildren(pageID)
if len(children[pageID]) == 0 {
for _, block := range blocks {
if block.ID == pageID || block.ParentID == pageID {
continue
}
if strings.TrimSpace(block.Text) != "" {
parts = append(parts, block.Text)
}
}
}
return parts
}
func sortBlockSiblings(blocks []Block) {
sort.SliceStable(blocks, func(i, j int) bool {
a, z := blocks[i], blocks[j]
if a.DisplayOrder != z.DisplayOrder {
return a.DisplayOrder < z.DisplayOrder
}
if a.CreatedTime == z.CreatedTime {
return a.ID < z.ID
}
return a.CreatedTime < z.CreatedTime
})
}
func (s *Store) Search(ctx context.Context, q string, limit int) ([]SearchResult, error) {
if limit <= 0 {
limit = 20

View File

@ -33,6 +33,67 @@ func TestStoreUpsertsAndSearchesPage(t *testing.T) {
}
}
func TestStoreOrdersBlocksByDisplayOrder(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Launch Plan", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
blocks := []Block{
{ID: "third", PageID: "page1", ParentID: "page1", Type: "text", Text: "third", DisplayOrder: 3, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
{ID: "first", PageID: "page1", ParentID: "page1", Type: "text", Text: "first", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
{ID: "second", PageID: "page1", ParentID: "page1", Type: "text", Text: "second", DisplayOrder: 2, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
}
for _, block := range blocks {
if err := st.UpsertBlock(ctx, block); err != nil {
t.Fatal(err)
}
}
got, err := st.PageBlocks(ctx, "page1")
if err != nil {
t.Fatal(err)
}
if len(got) != 3 || got[0].ID != "first" || got[1].ID != "second" || got[2].ID != "third" {
t.Fatalf("unexpected block order: %+v", got)
}
}
func TestStoreBuildsPageFTSInDisplayTreeOrder(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Recipe", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
blocks := []Block{
{ID: "z-root", PageID: "page1", ParentID: "page1", Type: "text", Text: "third", DisplayOrder: 2, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
{ID: "a-child", PageID: "page1", ParentID: "a-root", Type: "text", Text: "second", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
{ID: "a-root", PageID: "page1", ParentID: "page1", Type: "text", Text: "first", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
}
for _, block := range blocks {
if err := st.UpsertBlock(ctx, block); err != nil {
t.Fatal(err)
}
}
var body string
if err := st.DB().QueryRowContext(ctx, `select body from page_fts where page_id = ?`, "page1").Scan(&body); err != nil {
t.Fatal(err)
}
if body != "first\nsecond\nthird" {
t.Fatalf("unexpected FTS body order: %q", body)
}
}
func TestStoreStatusAndOptimize(t *testing.T) {
path := filepath.Join(t.TempDir(), "notcrawl.db")
st, err := Open(path)

View File

@ -47,6 +47,7 @@ type Block struct {
PropertiesJSON string
ContentJSON string
FormatJSON string
DisplayOrder int64
CreatedTime int64
LastEditedTime int64
Alive bool