Compare commits
6 Commits
perf/prelo
...
ci/update-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e3dba2b005 | ||
|
|
c5a5280bb0 | ||
|
|
d17c48b42f | ||
|
|
e96d54bccc | ||
|
|
74d12a6ec6 | ||
|
|
7c00851638 |
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@ -50,7 +50,7 @@ jobs:
|
||||
echo "RELEASE_VERSION=${TAG#v}" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Build release artifacts
|
||||
uses: goreleaser/goreleaser-action@v6
|
||||
uses: goreleaser/goreleaser-action@v7
|
||||
with:
|
||||
distribution: goreleaser
|
||||
version: "~> v2"
|
||||
|
||||
@ -327,11 +327,15 @@ func runSearch(ctx context.Context, stdout io.Writer, cfg config.Config, args []
|
||||
return err
|
||||
}
|
||||
for _, r := range results {
|
||||
fmt.Fprintf(stdout, "%s\t%s\t%s\t%s\n", r.Kind, r.ID, r.Title, r.Text)
|
||||
fmt.Fprintf(stdout, "%s\t%s\t%s\t%s\n", searchField(r.Kind), searchField(r.ID), searchField(r.Title), searchField(r.Text))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func searchField(s string) string {
|
||||
return strings.Join(strings.Fields(s), " ")
|
||||
}
|
||||
|
||||
func runSQL(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
|
||||
if len(args) == 0 {
|
||||
return fmt.Errorf("sql query required")
|
||||
|
||||
10
cmd/notcrawl/main_test.go
Normal file
10
cmd/notcrawl/main_test.go
Normal file
@ -0,0 +1,10 @@
|
||||
package main
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSearchFieldCollapsesRecordSeparators(t *testing.T) {
|
||||
got := searchField("line one\nline\ttwo line three")
|
||||
if got != "line one line two line three" {
|
||||
t.Fatalf("unexpected field: %q", got)
|
||||
}
|
||||
}
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@ -16,6 +17,7 @@ import (
|
||||
)
|
||||
|
||||
const SourceName = "desktop"
|
||||
const desktopSnapshotRetention = 2
|
||||
|
||||
type Source struct {
|
||||
Path string
|
||||
@ -83,6 +85,11 @@ func Ingest(ctx context.Context, st *store.Store, path, cacheDir string) (Summar
|
||||
if s.Comments, err = ingestComments(ctx, st, db); err != nil {
|
||||
return err
|
||||
}
|
||||
addedSpaces, err := st.EnsureSpaceFallbacks(ctx, SourceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
s.Spaces += addedSpaces
|
||||
return nil
|
||||
})
|
||||
}); err != nil {
|
||||
@ -122,9 +129,65 @@ func snapshotDB(path, cacheDir string) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
if err := pruneDesktopSnapshots(cacheDir, desktopSnapshotRetention, outPath); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return outPath, nil
|
||||
}
|
||||
|
||||
type desktopSnapshot struct {
|
||||
path string
|
||||
modTime time.Time
|
||||
}
|
||||
|
||||
func pruneDesktopSnapshots(cacheDir string, keep int, current string) error {
|
||||
if keep < 1 {
|
||||
keep = 1
|
||||
}
|
||||
entries, err := os.ReadDir(cacheDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var snapshots []desktopSnapshot
|
||||
for _, entry := range entries {
|
||||
name := entry.Name()
|
||||
if entry.IsDir() || !strings.HasPrefix(name, "notion-desktop-") || !strings.HasSuffix(name, ".db") {
|
||||
continue
|
||||
}
|
||||
path := filepath.Join(cacheDir, name)
|
||||
info, err := entry.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
snapshots = append(snapshots, desktopSnapshot{path: path, modTime: info.ModTime()})
|
||||
}
|
||||
sort.SliceStable(snapshots, func(i, j int) bool {
|
||||
if snapshots[i].modTime.Equal(snapshots[j].modTime) {
|
||||
return snapshots[i].path > snapshots[j].path
|
||||
}
|
||||
return snapshots[i].modTime.After(snapshots[j].modTime)
|
||||
})
|
||||
keepPaths := map[string]bool{}
|
||||
if current != "" {
|
||||
keepPaths[filepath.Clean(current)] = true
|
||||
}
|
||||
for i := 0; i < len(snapshots) && len(keepPaths) < keep; i++ {
|
||||
keepPaths[filepath.Clean(snapshots[i].path)] = true
|
||||
}
|
||||
for _, snapshot := range snapshots {
|
||||
path := filepath.Clean(snapshot.path)
|
||||
if keepPaths[path] {
|
||||
continue
|
||||
}
|
||||
for _, target := range []string{path, path + "-wal", path + "-shm"} {
|
||||
if err := os.Remove(target); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyFile(src, dst string, perm os.FileMode) error {
|
||||
in, err := os.Open(src)
|
||||
if err != nil {
|
||||
@ -258,6 +321,7 @@ type localBlock struct {
|
||||
Alive bool
|
||||
FormatJSON string
|
||||
RawJSON string
|
||||
Text string
|
||||
}
|
||||
|
||||
func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int, blocks int, rawRecords int, err error) {
|
||||
@ -282,6 +346,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
|
||||
return pages, blocks, rawRecords, err
|
||||
}
|
||||
b.Alive = alive != 0
|
||||
b.Text = blockText(b.PropertiesJSON)
|
||||
byID[b.ID] = b
|
||||
all = append(all, b)
|
||||
}
|
||||
@ -308,11 +373,8 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
|
||||
return ""
|
||||
}
|
||||
pageFor = func(id string) string { return resolve(id, map[string]bool{}) }
|
||||
children := childBlocksByParent(all)
|
||||
for _, b := range all {
|
||||
title := notiontext.TitleFromProperties(b.PropertiesJSON)
|
||||
if title == "" && isPageType(b.Type) {
|
||||
title = "Untitled"
|
||||
}
|
||||
if isPageType(b.Type) {
|
||||
if err := st.UpsertPage(ctx, store.Page{
|
||||
ID: b.ID,
|
||||
@ -320,7 +382,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
|
||||
ParentID: b.ParentID,
|
||||
ParentTable: b.ParentTable,
|
||||
CollectionID: b.CollectionID,
|
||||
Title: title,
|
||||
Title: pageTitle(b, children),
|
||||
PropertiesJSON: b.PropertiesJSON,
|
||||
CreatedTime: b.CreatedTime,
|
||||
LastEditedTime: b.LastEditedTime,
|
||||
@ -334,7 +396,6 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
|
||||
pages++
|
||||
}
|
||||
pageID := pageFor(b.ID)
|
||||
text := notiontext.PlainFromJSON(b.PropertiesJSON)
|
||||
if err := st.UpsertBlock(ctx, store.Block{
|
||||
ID: b.ID,
|
||||
PageID: pageID,
|
||||
@ -342,7 +403,7 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
|
||||
ParentID: b.ParentID,
|
||||
ParentTable: b.ParentTable,
|
||||
Type: b.Type,
|
||||
Text: text,
|
||||
Text: b.Text,
|
||||
PropertiesJSON: b.PropertiesJSON,
|
||||
ContentJSON: b.ContentJSON,
|
||||
FormatJSON: b.FormatJSON,
|
||||
@ -367,6 +428,73 @@ func ingestBlocks(ctx context.Context, st *store.Store, db *sql.DB) (pages int,
|
||||
return pages, blocks, rawRecords, nil
|
||||
}
|
||||
|
||||
func childBlocksByParent(blocks []localBlock) map[string][]localBlock {
|
||||
children := map[string][]localBlock{}
|
||||
for _, block := range blocks {
|
||||
if !block.Alive || block.ParentID == "" {
|
||||
continue
|
||||
}
|
||||
children[block.ParentID] = append(children[block.ParentID], block)
|
||||
}
|
||||
for parent := range children {
|
||||
sort.SliceStable(children[parent], func(i, j int) bool {
|
||||
a, z := children[parent][i], children[parent][j]
|
||||
if a.CreatedTime == z.CreatedTime {
|
||||
return a.ID < z.ID
|
||||
}
|
||||
return a.CreatedTime < z.CreatedTime
|
||||
})
|
||||
}
|
||||
return children
|
||||
}
|
||||
|
||||
func pageTitle(page localBlock, children map[string][]localBlock) string {
|
||||
if title := notiontext.TitleFromProperties(page.PropertiesJSON); title != "" {
|
||||
return title
|
||||
}
|
||||
if title := fallbackPageTitle(page.ID, children, map[string]bool{}); title != "" {
|
||||
return title
|
||||
}
|
||||
return "Untitled"
|
||||
}
|
||||
|
||||
func fallbackPageTitle(parentID string, children map[string][]localBlock, seen map[string]bool) string {
|
||||
if parentID == "" || seen[parentID] {
|
||||
return ""
|
||||
}
|
||||
seen[parentID] = true
|
||||
for _, child := range children[parentID] {
|
||||
if !isPageType(child.Type) {
|
||||
if title := titleSnippet(child.Text); title != "" {
|
||||
return title
|
||||
}
|
||||
}
|
||||
if title := fallbackPageTitle(child.ID, children, seen); title != "" {
|
||||
return title
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func titleSnippet(s string) string {
|
||||
s = notiontext.Normalize(s)
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
runes := []rune(s)
|
||||
if len(runes) > 96 {
|
||||
return string(runes[:96])
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func blockText(raw string) string {
|
||||
if title := notiontext.TitleFromProperties(raw); title != "" {
|
||||
return title
|
||||
}
|
||||
return notiontext.PlainFromJSON(raw)
|
||||
}
|
||||
|
||||
func ingestComments(ctx context.Context, st *store.Store, db *sql.DB) (int, error) {
|
||||
rows, err := db.QueryContext(ctx, `select id, parent_id, space_id, coalesce(text, ''), coalesce(created_by_id, ''),
|
||||
coalesce(cast(created_time as integer), 0), coalesce(cast(last_edited_time as integer), 0), alive,
|
||||
|
||||
110
internal/notiondesktop/desktop_test.go
Normal file
110
internal/notiondesktop/desktop_test.go
Normal file
@ -0,0 +1,110 @@
|
||||
package notiondesktop
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/vincentkoc/notcrawl/internal/store"
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
func TestPruneDesktopSnapshotsKeepsNewestAndSidecars(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
names := []string{
|
||||
"notion-desktop-1000.db",
|
||||
"notion-desktop-2000.db",
|
||||
"notion-desktop-3000.db",
|
||||
}
|
||||
for i, name := range names {
|
||||
path := filepath.Join(dir, name)
|
||||
if err := os.WriteFile(path, []byte(name), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, suffix := range []string{"-wal", "-shm"} {
|
||||
if err := os.WriteFile(path+suffix, []byte(suffix), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
modTime := time.Unix(int64(i+1), 0)
|
||||
for _, target := range []string{path, path + "-wal", path + "-shm"} {
|
||||
if err := os.Chtimes(target, modTime, modTime); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current := filepath.Join(dir, "notion-desktop-3000.db")
|
||||
if err := pruneDesktopSnapshots(dir, 2, current); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, name := range []string{"notion-desktop-2000.db", "notion-desktop-3000.db"} {
|
||||
path := filepath.Join(dir, name)
|
||||
for _, target := range []string{path, path + "-wal", path + "-shm"} {
|
||||
if _, err := os.Stat(target); err != nil {
|
||||
t.Fatalf("expected %s to remain: %v", target, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, target := range []string{
|
||||
filepath.Join(dir, "notion-desktop-1000.db"),
|
||||
filepath.Join(dir, "notion-desktop-1000.db-wal"),
|
||||
filepath.Join(dir, "notion-desktop-1000.db-shm"),
|
||||
} {
|
||||
if _, err := os.Stat(target); !os.IsNotExist(err) {
|
||||
t.Fatalf("expected %s to be pruned, got %v", target, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIngestBlocksDerivesUntitledPageFromChildText(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
src, err := sql.Open("sqlite", filepath.Join(t.TempDir(), "desktop.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer src.Close()
|
||||
if _, err := src.ExecContext(ctx, `create table block (
|
||||
id text primary key,
|
||||
space_id text,
|
||||
type text,
|
||||
properties text,
|
||||
content text,
|
||||
collection_id text,
|
||||
created_time integer,
|
||||
last_edited_time integer,
|
||||
parent_id text,
|
||||
parent_table text,
|
||||
alive integer,
|
||||
format text
|
||||
)`); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := src.ExecContext(ctx, `insert into block(id, space_id, type, properties, content, collection_id, created_time, last_edited_time, parent_id, parent_table, alive, format)
|
||||
values
|
||||
('page1', 'space1', 'page', '{}', '', '', 1, 1, '', '', 1, ''),
|
||||
('child1', 'space1', 'text', '{"title":[["Decision log"]]}', '', '', 2, 2, 'page1', 'block', 1, '')`); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer st.Close()
|
||||
if _, _, _, err := ingestBlocks(ctx, st, src); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var title string
|
||||
if err := st.DB().QueryRowContext(ctx, `select title from pages where id = 'page1'`).Scan(&title); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if title != "Decision log" {
|
||||
t.Fatalf("expected child text title, got %q", title)
|
||||
}
|
||||
}
|
||||
@ -6,6 +6,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@ -13,6 +14,7 @@ import (
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/vincentkoc/notcrawl/internal/store"
|
||||
@ -70,28 +72,42 @@ func Publish(ctx context.Context, st *store.Store, opts PublishOptions) (Publish
|
||||
if err := ensureRepo(ctx, opts.RepoPath, opts.Remote, opts.Branch); err != nil {
|
||||
return PublishSummary{}, err
|
||||
}
|
||||
if err := os.RemoveAll(filepath.Join(opts.RepoPath, "data")); err != nil {
|
||||
dataRoot := filepath.Join(opts.RepoPath, "data")
|
||||
pagesRoot := filepath.Join(opts.RepoPath, "pages")
|
||||
if err := os.MkdirAll(dataRoot, 0o755); err != nil {
|
||||
return PublishSummary{}, err
|
||||
}
|
||||
if err := os.RemoveAll(filepath.Join(opts.RepoPath, "pages")); err != nil {
|
||||
return PublishSummary{}, err
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Join(opts.RepoPath, "data"), 0o755); err != nil {
|
||||
if err := os.MkdirAll(pagesRoot, 0o755); err != nil {
|
||||
return PublishSummary{}, err
|
||||
}
|
||||
manifest := Manifest{GeneratedAt: time.Now().UTC().Format(time.RFC3339)}
|
||||
dataKeep := map[string]bool{}
|
||||
for _, table := range exportTables {
|
||||
tm, err := exportTable(ctx, st.DB(), opts.RepoPath, table)
|
||||
if err != nil {
|
||||
return PublishSummary{}, err
|
||||
}
|
||||
manifest.Tables = append(manifest.Tables, tm)
|
||||
dataKeep[filepath.Clean(filepath.Join(opts.RepoPath, tm.Path))] = true
|
||||
}
|
||||
if err := pruneGeneratedFiles(dataRoot, dataKeep, func(path string) bool {
|
||||
return strings.HasSuffix(path, ".jsonl.gz")
|
||||
}); err != nil {
|
||||
return PublishSummary{}, err
|
||||
}
|
||||
pagesKeep := map[string]bool{}
|
||||
if opts.MarkdownDir != "" {
|
||||
if err := copyDir(opts.MarkdownDir, filepath.Join(opts.RepoPath, "pages")); err != nil && !os.IsNotExist(err) {
|
||||
var err error
|
||||
pagesKeep, err = copyDir(opts.MarkdownDir, pagesRoot)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return PublishSummary{}, err
|
||||
}
|
||||
}
|
||||
if err := pruneGeneratedFiles(pagesRoot, pagesKeep, func(path string) bool {
|
||||
return strings.HasSuffix(path, ".md")
|
||||
}); err != nil {
|
||||
return PublishSummary{}, err
|
||||
}
|
||||
b, err := json.MarshalIndent(manifest, "", " ")
|
||||
if err != nil {
|
||||
return PublishSummary{}, err
|
||||
@ -317,15 +333,16 @@ func run(ctx context.Context, dir, name string, args ...string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func copyDir(src, dst string) error {
|
||||
func copyDir(src, dst string) (map[string]bool, error) {
|
||||
info, err := os.Stat(src)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
return fmt.Errorf("not a directory: %s", src)
|
||||
return nil, fmt.Errorf("not a directory: %s", src)
|
||||
}
|
||||
return filepath.WalkDir(src, func(path string, d os.DirEntry, err error) error {
|
||||
keep := map[string]bool{}
|
||||
err = filepath.WalkDir(src, func(path string, d os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -350,9 +367,51 @@ func copyDir(src, dst string) error {
|
||||
return err
|
||||
}
|
||||
defer out.Close()
|
||||
_, err = io.Copy(out, in)
|
||||
return err
|
||||
if _, err := io.Copy(out, in); err != nil {
|
||||
return err
|
||||
}
|
||||
keep[filepath.Clean(target)] = true
|
||||
return nil
|
||||
})
|
||||
return keep, err
|
||||
}
|
||||
|
||||
func pruneGeneratedFiles(root string, keep map[string]bool, shouldPrune func(string) bool) error {
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
var dirs []string
|
||||
if err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == root {
|
||||
return nil
|
||||
}
|
||||
if d.IsDir() {
|
||||
dirs = append(dirs, path)
|
||||
return nil
|
||||
}
|
||||
clean := filepath.Clean(path)
|
||||
if shouldPrune(clean) && !keep[clean] {
|
||||
return os.Remove(clean)
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
sort.Slice(dirs, func(i, j int) bool {
|
||||
return len(dirs[i]) > len(dirs[j])
|
||||
})
|
||||
for _, dir := range dirs {
|
||||
if err := os.Remove(dir); err != nil && !os.IsNotExist(err) && !errors.Is(err, syscall.ENOTEMPTY) && !errors.Is(err, syscall.EEXIST) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func exportValue(v any) any {
|
||||
|
||||
@ -39,6 +39,35 @@ func TestPublishAndImportSnapshot(t *testing.T) {
|
||||
if _, err := os.Stat(filepath.Join(repo, "pages", "default", "launch-page1.md")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
stalePage := filepath.Join(repo, "pages", "default", "stale.md")
|
||||
if err := os.WriteFile(stalePage, []byte("stale"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pageSidecar := filepath.Join(repo, "pages", "default", "README.txt")
|
||||
if err := os.WriteFile(pageSidecar, []byte("keep"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
staleData := filepath.Join(repo, "data", "stale.jsonl.gz")
|
||||
if err := os.WriteFile(staleData, []byte("stale"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
dataSidecar := filepath.Join(repo, "data", "README.txt")
|
||||
if err := os.WriteFile(dataSidecar, []byte("keep"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if _, err := Publish(ctx, src, PublishOptions{RepoPath: repo, MarkdownDir: mdDir}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, path := range []string{stalePage, staleData} {
|
||||
if _, err := os.Stat(path); !os.IsNotExist(err) {
|
||||
t.Fatalf("expected generated stale file %s to be pruned, got %v", path, err)
|
||||
}
|
||||
}
|
||||
for _, path := range []string{pageSidecar, dataSidecar} {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
t.Fatalf("expected sidecar %s to remain: %v", path, err)
|
||||
}
|
||||
}
|
||||
dst, err := store.Open(filepath.Join(t.TempDir(), "dst.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
||||
@ -197,14 +197,14 @@ func (s *Store) SpaceName(ctx context.Context, id string) (string, error) {
|
||||
err := s.queryRowContext(ctx, `select name from spaces where id = ?`, id).Scan(&name)
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return "space-" + shortID(id), nil
|
||||
return fallbackSpaceName(id), nil
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
if name.Valid && name.String != "" {
|
||||
return name.String, nil
|
||||
}
|
||||
return "space-" + shortID(id), nil
|
||||
return fallbackSpaceName(id), nil
|
||||
}
|
||||
|
||||
func (s *Store) TeamName(ctx context.Context, id string) (string, error) {
|
||||
@ -277,3 +277,7 @@ func shortID(id string) string {
|
||||
}
|
||||
return clean
|
||||
}
|
||||
|
||||
func fallbackSpaceName(id string) string {
|
||||
return "External Space " + shortID(id)
|
||||
}
|
||||
|
||||
@ -372,6 +372,47 @@ func (s *Store) UpsertSpace(ctx context.Context, x Space) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *Store) EnsureSpaceFallbacks(ctx context.Context, source string) (int, error) {
|
||||
rows, err := s.queryContext(ctx, `select distinct space_id from (
|
||||
select space_id from pages
|
||||
union all select space_id from blocks
|
||||
union all select space_id from teams
|
||||
union all select space_id from collections
|
||||
union all select space_id from comments
|
||||
union all select space_id from raw_records
|
||||
)
|
||||
where coalesce(space_id, '') <> ''
|
||||
and space_id not in (select id from spaces)`)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var ids []string
|
||||
for rows.Next() {
|
||||
var id string
|
||||
if err := rows.Scan(&id); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
ids = append(ids, id)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
now := NowMS()
|
||||
for _, id := range ids {
|
||||
if err := s.UpsertSpace(ctx, Space{
|
||||
ID: id,
|
||||
Name: fallbackSpaceName(id),
|
||||
RawJSON: fmt.Sprintf(`{"id":%q,"inferred":true}`, id),
|
||||
Source: source,
|
||||
SyncedAt: now,
|
||||
}); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
return len(ids), nil
|
||||
}
|
||||
|
||||
func (s *Store) UpsertUser(ctx context.Context, x User) error {
|
||||
_, err := s.execContext(ctx, `insert into users(id, name, email, raw_json, source, synced_at)
|
||||
values (?, ?, ?, ?, ?, ?)
|
||||
@ -614,8 +655,30 @@ func (s *Store) Search(ctx context.Context, q string, limit int) ([]SearchResult
|
||||
if limit <= 0 {
|
||||
limit = 20
|
||||
}
|
||||
rows, err := s.queryContext(ctx, `select 'page', page_id, title, snippet(page_fts, 2, '[', ']', '...', 16)
|
||||
from page_fts where page_fts match ? limit ?`, q, limit)
|
||||
rows, err := s.queryContext(ctx, `select kind, id, title, text from (
|
||||
select 'page' as kind,
|
||||
page_fts.page_id as id,
|
||||
page_fts.title as title,
|
||||
snippet(page_fts, 2, '[', ']', '...', 16) as text,
|
||||
bm25(page_fts) as rank,
|
||||
coalesce(p.last_edited_time, p.created_time, 0) as edited_at
|
||||
from page_fts
|
||||
join pages p on p.id = page_fts.page_id
|
||||
where page_fts match ?
|
||||
union all
|
||||
select 'comment' as kind,
|
||||
comment_fts.comment_id as id,
|
||||
coalesce(p.title, '') as title,
|
||||
snippet(comment_fts, 2, '[', ']', '...', 16) as text,
|
||||
bm25(comment_fts) as rank,
|
||||
coalesce(c.last_edited_time, c.created_time, 0) as edited_at
|
||||
from comment_fts
|
||||
join comments c on c.id = comment_fts.comment_id
|
||||
left join pages p on p.id = comment_fts.page_id
|
||||
where comment_fts match ?
|
||||
)
|
||||
order by rank, edited_at desc, kind, lower(title), id
|
||||
limit ?`, q, q, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@ -34,6 +34,60 @@ func TestStoreUpsertsAndSearchesPage(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreSearchRanksByRelevanceThenRecency(t *testing.T) {
|
||||
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer st.Close()
|
||||
ctx := context.Background()
|
||||
now := NowMS()
|
||||
pages := []Page{
|
||||
{ID: "old", Title: "Old", LastEditedTime: now - 1000, Alive: true, Source: "test", SyncedAt: now},
|
||||
{ID: "new", Title: "New", LastEditedTime: now, Alive: true, Source: "test", SyncedAt: now},
|
||||
}
|
||||
for _, page := range pages {
|
||||
if err := st.UpsertPage(ctx, page); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := st.UpsertBlock(ctx, Block{ID: page.ID + "-block", PageID: page.ID, Type: "text", Text: "needle", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
results, err := st.Search(ctx, "needle", 10)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(results) < 2 || results[0].ID != "new" || results[1].ID != "old" {
|
||||
t.Fatalf("expected newer equal-rank page first, got %+v", results)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreSearchIncludesComments(t *testing.T) {
|
||||
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer st.Close()
|
||||
ctx := context.Background()
|
||||
now := NowMS()
|
||||
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Launch", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := st.UpsertComment(ctx, Comment{ID: "comment1", PageID: "page1", Text: "needle from a comment", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
results, err := st.Search(ctx, "needle", 10)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(results) != 1 || results[0].Kind != "comment" || results[0].ID != "comment1" || results[0].Title != "Launch" {
|
||||
t.Fatalf("expected comment search result with page title, got %+v", results)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreDefersPageFTSRefresh(t *testing.T) {
|
||||
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
@ -109,6 +163,42 @@ func TestStoreTransactionCommitsAndRollsBack(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreEnsuresFallbackSpaces(t *testing.T) {
|
||||
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer st.Close()
|
||||
ctx := context.Background()
|
||||
now := NowMS()
|
||||
spaceID := "52f1c029-1111-2222-3333-ea9259e0"
|
||||
if err := st.UpsertPage(ctx, Page{ID: "page1", SpaceID: spaceID, Title: "Loose", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
added, err := st.EnsureSpaceFallbacks(ctx, "test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if added != 1 {
|
||||
t.Fatalf("expected one fallback space, got %d", added)
|
||||
}
|
||||
name, err := st.SpaceName(ctx, spaceID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if name != "External Space 52f1c029-ea9259e0" {
|
||||
t.Fatalf("unexpected fallback space name: %q", name)
|
||||
}
|
||||
added, err = st.EnsureSpaceFallbacks(ctx, "test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if added != 0 {
|
||||
t.Fatalf("expected fallback insertion to be idempotent, got %d", added)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStoreOrdersBlocksByDisplayOrder(t *testing.T) {
|
||||
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
|
||||
if err != nil {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user