Compare commits

...

1 Commits

Author SHA1 Message Date
Vincent Koc
cf070b80d2
fix(desktop): infer missing metadata
Some checks failed
Validation / validate (push) Has been cancelled
2026-04-27 12:44:48 -07:00
5 changed files with 213 additions and 9 deletions

View File

@ -85,6 +85,11 @@ func Ingest(ctx context.Context, st *store.Store, path, cacheDir string) (Summar
if s.Comments, err = ingestComments(ctx, st, db); err != nil {
return err
}
addedSpaces, err := st.EnsureSpaceFallbacks(ctx, SourceName)
if err != nil {
return err
}
s.Spaces += addedSpaces
return nil
})
}); err != nil {
@ -316,6 +321,7 @@ type localBlock struct {
Alive bool
FormatJSON string
RawJSON string
Text string
}
func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int, blocks int, rawRecords int, err error) {
@ -340,6 +346,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
return pages, blocks, rawRecords, err
}
b.Alive = alive != 0
b.Text = blockText(b.PropertiesJSON)
byID[b.ID] = b
all = append(all, b)
}
@ -366,11 +373,8 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
return ""
}
pageFor = func(id string) string { return resolve(id, map[string]bool{}) }
children := childBlocksByParent(all)
for _, b := range all {
title := notiontext.TitleFromProperties(b.PropertiesJSON)
if title == "" && isPageType(b.Type) {
title = "Untitled"
}
if isPageType(b.Type) {
if err := st.UpsertPage(ctx, store.Page{
ID: b.ID,
@ -378,7 +382,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
ParentID: b.ParentID,
ParentTable: b.ParentTable,
CollectionID: b.CollectionID,
Title: title,
Title: pageTitle(b, children),
PropertiesJSON: b.PropertiesJSON,
CreatedTime: b.CreatedTime,
LastEditedTime: b.LastEditedTime,
@ -392,7 +396,6 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
pages++
}
pageID := pageFor(b.ID)
text := notiontext.PlainFromJSON(b.PropertiesJSON)
if err := st.UpsertBlock(ctx, store.Block{
ID: b.ID,
PageID: pageID,
@ -400,7 +403,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
ParentID: b.ParentID,
ParentTable: b.ParentTable,
Type: b.Type,
Text: text,
Text: b.Text,
PropertiesJSON: b.PropertiesJSON,
ContentJSON: b.ContentJSON,
FormatJSON: b.FormatJSON,
@ -425,6 +428,73 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
return pages, blocks, rawRecords, nil
}
func childBlocksByParent(blocks []localBlock) map[string][]localBlock {
children := map[string][]localBlock{}
for _, block := range blocks {
if !block.Alive || block.ParentID == "" {
continue
}
children[block.ParentID] = append(children[block.ParentID], block)
}
for parent := range children {
sort.SliceStable(children[parent], func(i, j int) bool {
a, z := children[parent][i], children[parent][j]
if a.CreatedTime == z.CreatedTime {
return a.ID < z.ID
}
return a.CreatedTime < z.CreatedTime
})
}
return children
}
func pageTitle(page localBlock, children map[string][]localBlock) string {
if title := notiontext.TitleFromProperties(page.PropertiesJSON); title != "" {
return title
}
if title := fallbackPageTitle(page.ID, children, map[string]bool{}); title != "" {
return title
}
return "Untitled"
}
func fallbackPageTitle(parentID string, children map[string][]localBlock, seen map[string]bool) string {
if parentID == "" || seen[parentID] {
return ""
}
seen[parentID] = true
for _, child := range children[parentID] {
if !isPageType(child.Type) {
if title := titleSnippet(child.Text); title != "" {
return title
}
}
if title := fallbackPageTitle(child.ID, children, seen); title != "" {
return title
}
}
return ""
}
func titleSnippet(s string) string {
s = notiontext.Normalize(s)
if s == "" {
return ""
}
runes := []rune(s)
if len(runes) > 96 {
return string(runes[:96])
}
return s
}
func blockText(raw string) string {
if title := notiontext.TitleFromProperties(raw); title != "" {
return title
}
return notiontext.PlainFromJSON(raw)
}
func ingestComments(ctx context.Context, st *store.Store, db *sql.DB) (int, error) {
rows, err := db.QueryContext(ctx, `select id, parent_id, space_id, coalesce(text, ''), coalesce(created_by_id, ''),
coalesce(cast(created_time as integer), 0), coalesce(cast(last_edited_time as integer), 0), alive,

View File

@ -1,10 +1,15 @@
package notiondesktop
import (
"context"
"database/sql"
"os"
"path/filepath"
"testing"
"time"
"github.com/vincentkoc/notcrawl/internal/store"
_ "modernc.org/sqlite"
)
func TestPruneDesktopSnapshotsKeepsNewestAndSidecars(t *testing.T) {
@ -55,3 +60,51 @@ func TestPruneDesktopSnapshotsKeepsNewestAndSidecars(t *testing.T) {
}
}
}
func TestIngestBlocksDerivesUntitledPageFromChildText(t *testing.T) {
ctx := context.Background()
src, err := sql.Open("sqlite", filepath.Join(t.TempDir(), "desktop.db"))
if err != nil {
t.Fatal(err)
}
defer src.Close()
if _, err := src.ExecContext(ctx, `create table block (
id text primary key,
space_id text,
type text,
properties text,
content text,
collection_id text,
created_time integer,
last_edited_time integer,
parent_id text,
parent_table text,
alive integer,
format text
)`); err != nil {
t.Fatal(err)
}
if _, err := src.ExecContext(ctx, `insert into block(id, space_id, type, properties, content, collection_id, created_time, last_edited_time, parent_id, parent_table, alive, format)
values
('page1', 'space1', 'page', '{}', '', '', 1, 1, '', '', 1, ''),
('child1', 'space1', 'text', '{"title":[["Decision log"]]}', '', '', 2, 2, 'page1', 'block', 1, '')`); err != nil {
t.Fatal(err)
}
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
if _, _, _, err := ingestBlocks(ctx, st, src); err != nil {
t.Fatal(err)
}
var title string
if err := st.DB().QueryRowContext(ctx, `select title from pages where id = 'page1'`).Scan(&title); err != nil {
t.Fatal(err)
}
if title != "Decision log" {
t.Fatalf("expected child text title, got %q", title)
}
}

View File

@ -197,14 +197,14 @@ func (s *Store) SpaceName(ctx context.Context, id string) (string, error) {
err := s.queryRowContext(ctx, `select name from spaces where id = ?`, id).Scan(&name)
if err != nil {
if err == sql.ErrNoRows {
return "space-" + shortID(id), nil
return fallbackSpaceName(id), nil
}
return "", err
}
if name.Valid && name.String != "" {
return name.String, nil
}
return "space-" + shortID(id), nil
return fallbackSpaceName(id), nil
}
func (s *Store) TeamName(ctx context.Context, id string) (string, error) {
@ -277,3 +277,7 @@ func shortID(id string) string {
}
return clean
}
func fallbackSpaceName(id string) string {
return "External Space " + shortID(id)
}

View File

@ -372,6 +372,47 @@ func (s *Store) UpsertSpace(ctx context.Context, x Space) error {
return err
}
func (s *Store) EnsureSpaceFallbacks(ctx context.Context, source string) (int, error) {
rows, err := s.queryContext(ctx, `select distinct space_id from (
select space_id from pages
union all select space_id from blocks
union all select space_id from teams
union all select space_id from collections
union all select space_id from comments
union all select space_id from raw_records
)
where coalesce(space_id, '') <> ''
and space_id not in (select id from spaces)`)
if err != nil {
return 0, err
}
defer rows.Close()
var ids []string
for rows.Next() {
var id string
if err := rows.Scan(&id); err != nil {
return 0, err
}
ids = append(ids, id)
}
if err := rows.Err(); err != nil {
return 0, err
}
now := NowMS()
for _, id := range ids {
if err := s.UpsertSpace(ctx, Space{
ID: id,
Name: fallbackSpaceName(id),
RawJSON: fmt.Sprintf(`{"id":%q,"inferred":true}`, id),
Source: source,
SyncedAt: now,
}); err != nil {
return 0, err
}
}
return len(ids), nil
}
func (s *Store) UpsertUser(ctx context.Context, x User) error {
_, err := s.execContext(ctx, `insert into users(id, name, email, raw_json, source, synced_at)
values (?, ?, ?, ?, ?, ?)

View File

@ -109,6 +109,42 @@ func TestStoreTransactionCommitsAndRollsBack(t *testing.T) {
}
}
func TestStoreEnsuresFallbackSpaces(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
spaceID := "52f1c029-1111-2222-3333-ea9259e0"
if err := st.UpsertPage(ctx, Page{ID: "page1", SpaceID: spaceID, Title: "Loose", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
added, err := st.EnsureSpaceFallbacks(ctx, "test")
if err != nil {
t.Fatal(err)
}
if added != 1 {
t.Fatalf("expected one fallback space, got %d", added)
}
name, err := st.SpaceName(ctx, spaceID)
if err != nil {
t.Fatal(err)
}
if name != "External Space 52f1c029-ea9259e0" {
t.Fatalf("unexpected fallback space name: %q", name)
}
added, err = st.EnsureSpaceFallbacks(ctx, "test")
if err != nil {
t.Fatal(err)
}
if added != 0 {
t.Fatalf("expected fallback insertion to be idempotent, got %d", added)
}
}
func TestStoreOrdersBlocksByDisplayOrder(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {