diff --git a/cmd/notcrawl/main.go b/cmd/notcrawl/main.go index d326e28..659fb70 100644 --- a/cmd/notcrawl/main.go +++ b/cmd/notcrawl/main.go @@ -327,11 +327,15 @@ func runSearch(ctx context.Context, stdout io.Writer, cfg config.Config, args [] return err } for _, r := range results { - fmt.Fprintf(stdout, "%s\t%s\t%s\t%s\n", r.Kind, r.ID, r.Title, r.Text) + fmt.Fprintf(stdout, "%s\t%s\t%s\t%s\n", searchField(r.Kind), searchField(r.ID), searchField(r.Title), searchField(r.Text)) } return nil } +func searchField(s string) string { + return strings.Join(strings.Fields(s), " ") +} + func runSQL(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error { if len(args) == 0 { return fmt.Errorf("sql query required") diff --git a/cmd/notcrawl/main_test.go b/cmd/notcrawl/main_test.go new file mode 100644 index 0000000..632ff48 --- /dev/null +++ b/cmd/notcrawl/main_test.go @@ -0,0 +1,10 @@ +package main + +import "testing" + +func TestSearchFieldCollapsesRecordSeparators(t *testing.T) { + got := searchField("line one\nline\ttwo line three") + if got != "line one line two line three" { + t.Fatalf("unexpected field: %q", got) + } +} diff --git a/internal/store/store.go b/internal/store/store.go index 219c74b..434adcf 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -655,8 +655,30 @@ func (s *Store) Search(ctx context.Context, q string, limit int) ([]SearchResult if limit <= 0 { limit = 20 } - rows, err := s.queryContext(ctx, `select 'page', page_id, title, snippet(page_fts, 2, '[', ']', '...', 16) - from page_fts where page_fts match ? limit ?`, q, limit) + rows, err := s.queryContext(ctx, `select kind, id, title, text from ( + select 'page' as kind, + page_fts.page_id as id, + page_fts.title as title, + snippet(page_fts, 2, '[', ']', '...', 16) as text, + bm25(page_fts) as rank, + coalesce(p.last_edited_time, p.created_time, 0) as edited_at + from page_fts + join pages p on p.id = page_fts.page_id + where page_fts match ? + union all + select 'comment' as kind, + comment_fts.comment_id as id, + coalesce(p.title, '') as title, + snippet(comment_fts, 2, '[', ']', '...', 16) as text, + bm25(comment_fts) as rank, + coalesce(c.last_edited_time, c.created_time, 0) as edited_at + from comment_fts + join comments c on c.id = comment_fts.comment_id + left join pages p on p.id = comment_fts.page_id + where comment_fts match ? + ) + order by rank, edited_at desc, kind, lower(title), id + limit ?`, q, q, limit) if err != nil { return nil, err } diff --git a/internal/store/store_test.go b/internal/store/store_test.go index f0f0fd2..e68977e 100644 --- a/internal/store/store_test.go +++ b/internal/store/store_test.go @@ -34,6 +34,60 @@ func TestStoreUpsertsAndSearchesPage(t *testing.T) { } } +func TestStoreSearchRanksByRelevanceThenRecency(t *testing.T) { + st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db")) + if err != nil { + t.Fatal(err) + } + defer st.Close() + ctx := context.Background() + now := NowMS() + pages := []Page{ + {ID: "old", Title: "Old", LastEditedTime: now - 1000, Alive: true, Source: "test", SyncedAt: now}, + {ID: "new", Title: "New", LastEditedTime: now, Alive: true, Source: "test", SyncedAt: now}, + } + for _, page := range pages { + if err := st.UpsertPage(ctx, page); err != nil { + t.Fatal(err) + } + if err := st.UpsertBlock(ctx, Block{ID: page.ID + "-block", PageID: page.ID, Type: "text", Text: "needle", Alive: true, Source: "test", SyncedAt: now}); err != nil { + t.Fatal(err) + } + } + + results, err := st.Search(ctx, "needle", 10) + if err != nil { + t.Fatal(err) + } + if len(results) < 2 || results[0].ID != "new" || results[1].ID != "old" { + t.Fatalf("expected newer equal-rank page first, got %+v", results) + } +} + +func TestStoreSearchIncludesComments(t *testing.T) { + st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db")) + if err != nil { + t.Fatal(err) + } + defer st.Close() + ctx := context.Background() + now := NowMS() + if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Launch", Alive: true, Source: "test", SyncedAt: now}); err != nil { + t.Fatal(err) + } + if err := st.UpsertComment(ctx, Comment{ID: "comment1", PageID: "page1", Text: "needle from a comment", Alive: true, Source: "test", SyncedAt: now}); err != nil { + t.Fatal(err) + } + + results, err := st.Search(ctx, "needle", 10) + if err != nil { + t.Fatal(err) + } + if len(results) != 1 || results[0].Kind != "comment" || results[0].ID != "comment1" || results[0].Title != "Launch" { + t.Fatalf("expected comment search result with page title, got %+v", results) + } +} + func TestStoreDefersPageFTSRefresh(t *testing.T) { st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db")) if err != nil {