feat: add Notion database export

This commit is contained in:
Vincent Koc 2026-04-22 21:44:36 -07:00 committed by GitHub
parent b1369ef3f5
commit 0a1d9b1992
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 720 additions and 15 deletions

View File

@ -20,7 +20,9 @@ to without holding Notion credentials.
- local SQLite storage with FTS5
- read-only local desktop cache ingestion from macOS Notion
- official API page/block/user/comment ingestion
- Notion database metadata and row ingestion through the official API
- normalized Markdown export organized by space and page path
- CSV/TSV export for crawled Notion database rows
- compressed JSONL git-share snapshots plus import/update workflows
- read-only SQL access for ad hoc inspection
@ -51,6 +53,8 @@ Or use the official Notion API:
```bash
export NOTION_TOKEN="secret_..."
notcrawl sync --source api
notcrawl databases
notcrawl export-db --database DATABASE_ID --format csv --output roadmap.csv
```
Default paths:
@ -67,6 +71,8 @@ Default paths:
- `doctor` checks config, SQLite, desktop cache, and token presence
- `sync` ingests from `desktop`, `api`, or `all`
- `export-md` renders normalized Markdown files from SQLite
- `databases` lists crawled Notion databases
- `export-db` exports a crawled Notion database to CSV or TSV
- `search` searches page and comment text through FTS5
- `sql` runs read-only SQL against the archive
- `publish` exports SQLite tables and Markdown into a git share repo

24
SPEC.md
View File

@ -23,11 +23,13 @@ V1 scope:
- official Notion API sync
- pages and blocks
- databases/data sources as collections
- database rows as pages linked to their collection
- comments and discussions where available
- users and spaces/workspaces
- FTS5 search over rendered page/comment text
- raw SQL access
- Markdown export
- CSV/TSV export for database rows
- git-backed archive publishing and subscription
Out of scope for V1:
@ -130,3 +132,25 @@ pages/**/*.md
SQLite without requiring Notion credentials.
`update` pulls the latest snapshot and imports it.
## Database Export
API sync discovers databases visible to the integration, stores database
metadata in `collections`, queries each database for row pages, and links those
pages through `pages.collection_id`.
`export-db` renders row properties into delimited text:
```text
notcrawl export-db --database <database-id> --format csv --output rows.csv
notcrawl export-db --database <database-id> --format tsv --output rows.tsv
```
The first columns are stable metadata:
- `page_id`
- `page_title`
- `url`
Remaining columns come from the database schema, with any extra row properties
appended alphabetically.

View File

@ -16,6 +16,7 @@ import (
"github.com/vincentkoc/notcrawl/internal/notiondesktop"
"github.com/vincentkoc/notcrawl/internal/share"
"github.com/vincentkoc/notcrawl/internal/store"
"github.com/vincentkoc/notcrawl/internal/tableexport"
)
func main() {
@ -69,6 +70,10 @@ func run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
return runSync(ctx, stdout, cfg, cmdArgs)
case "export-md":
return runExportMarkdown(ctx, stdout, cfg)
case "databases":
return runDatabases(ctx, stdout, cfg)
case "export-db":
return runExportDatabase(ctx, stdout, cfg, cmdArgs)
case "search":
return runSearch(ctx, stdout, cfg, cmdArgs)
case "sql":
@ -140,7 +145,7 @@ func runSync(ctx context.Context, stdout io.Writer, cfg config.Config, args []st
if err != nil {
return err
}
fmt.Fprintf(stdout, "api: users=%d pages=%d blocks=%d comments=%d\n", s.Users, s.Pages, s.Blocks, s.Comments)
fmt.Fprintf(stdout, "api: users=%d pages=%d databases=%d database_rows=%d blocks=%d comments=%d\n", s.Users, s.Pages, s.Databases, s.DatabaseRows, s.Blocks, s.Comments)
case "all":
if cfg.Notion.Desktop.Enabled {
s, err := notiondesktop.Ingest(ctx, st, cfg.Notion.Desktop.Path, cfg.CacheDir)
@ -158,7 +163,7 @@ func runSync(ctx context.Context, stdout io.Writer, cfg config.Config, args []st
if err != nil {
return err
}
fmt.Fprintf(stdout, "api: users=%d pages=%d blocks=%d comments=%d\n", s.Users, s.Pages, s.Blocks, s.Comments)
fmt.Fprintf(stdout, "api: users=%d pages=%d databases=%d database_rows=%d blocks=%d comments=%d\n", s.Users, s.Pages, s.Databases, s.DatabaseRows, s.Blocks, s.Comments)
}
default:
return fmt.Errorf("unknown source %q", *source)
@ -180,6 +185,63 @@ func runExportMarkdown(ctx context.Context, stdout io.Writer, cfg config.Config)
return nil
}
func runDatabases(ctx context.Context, stdout io.Writer, cfg config.Config) error {
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
collections, err := st.Collections(ctx)
if err != nil {
return err
}
fmt.Fprintln(stdout, "id\tname\tsource")
for _, collection := range collections {
fmt.Fprintf(stdout, "%s\t%s\t%s\n", collection.ID, collection.Name, collection.Source)
}
return nil
}
func runExportDatabase(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
fs := flag.NewFlagSet("export-db", flag.ContinueOnError)
databaseID := fs.String("database", "", "database id to export")
format := fs.String("format", "csv", "output format: csv or tsv")
output := fs.String("output", "", "output file path, defaults to stdout")
if err := fs.Parse(args); err != nil {
return err
}
if *databaseID == "" {
return fmt.Errorf("export-db requires --database")
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
var out io.Writer = stdout
var file *os.File
if *output != "" {
outputPath, err := config.ExpandPath(*output)
if err != nil {
return err
}
file, err = os.Create(outputPath)
if err != nil {
return err
}
defer file.Close()
out = file
}
s, err := tableexport.Exporter{Store: st}.Export(ctx, *databaseID, tableexport.Format(*format), out)
if err != nil {
return err
}
if *output != "" {
fmt.Fprintf(stdout, "exported %d rows and %d columns from %s to %s\n", s.Rows, s.Columns, s.Database, file.Name())
}
return nil
}
func runSearch(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
if len(args) == 0 {
return fmt.Errorf("search query required")
@ -347,6 +409,8 @@ Commands:
sync --source api Ingest through the official Notion API
sync --source all Run enabled sources
export-md Render normalized Markdown from SQLite
databases List crawled Notion databases
export-db --database ID Export a database as CSV or TSV
search QUERY Search page text
sql QUERY Run read-only SQL
publish [--push] Export data and Markdown into a git share repo

View File

@ -25,10 +25,12 @@ type Client struct {
}
type Summary struct {
Users int
Pages int
Blocks int
Comments int
Users int
Pages int
Blocks int
Comments int
Databases int
DatabaseRows int
}
func (c Client) Sync(ctx context.Context, st *store.Store) (Summary, error) {
@ -63,7 +65,7 @@ func (c Client) Sync(ctx context.Context, st *store.Store) (Summary, error) {
return s, err
}
for _, page := range pages {
count, comments, err := c.ingestPage(ctx, st, page)
count, comments, err := c.ingestPage(ctx, st, page, ingestPageOptions{FetchBlocks: true, FetchComments: true})
if err != nil {
return s, err
}
@ -71,6 +73,18 @@ func (c Client) Sync(ctx context.Context, st *store.Store) (Summary, error) {
s.Blocks += count
s.Comments += comments
}
databases, err := c.searchDatabases(ctx)
if err != nil {
return s, err
}
for _, database := range databases {
rows, err := c.ingestDatabase(ctx, st, database)
if err != nil {
return s, err
}
s.Databases++
s.DatabaseRows += rows
}
if err := st.SetSyncState(ctx, SourceName, "workspace", "default", time.Now().Format(time.RFC3339)); err != nil {
return s, err
}
@ -128,10 +142,18 @@ func (c Client) listUsers(ctx context.Context) ([]obj, error) {
}
func (c Client) searchPages(ctx context.Context) ([]obj, error) {
return c.searchObjects(ctx, "page")
}
func (c Client) searchDatabases(ctx context.Context) ([]obj, error) {
return c.searchObjects(ctx, "database")
}
func (c Client) searchObjects(ctx context.Context, objectType string) ([]obj, error) {
var out []obj
cursor := ""
for {
body := obj{"page_size": 100, "filter": obj{"property": "object", "value": "page"}}
body := obj{"page_size": 100, "filter": obj{"property": "object", "value": objectType}}
if cursor != "" {
body["start_cursor"] = cursor
}
@ -154,7 +176,13 @@ func (c Client) searchPages(ctx context.Context) ([]obj, error) {
}
}
func (c Client) ingestPage(ctx context.Context, st *store.Store, page obj) (blockCount int, commentCount int, err error) {
type ingestPageOptions struct {
CollectionID string
FetchBlocks bool
FetchComments bool
}
func (c Client) ingestPage(ctx context.Context, st *store.Store, page obj, opts ingestPageOptions) (blockCount int, commentCount int, err error) {
raw := notiontext.MarshalRaw(page)
props := marshalAny(page["properties"])
parent := page.mapObj("parent")
@ -162,12 +190,20 @@ func (c Client) ingestPage(ctx context.Context, st *store.Store, page obj) (bloc
if parentID == "" {
parentID = parent.string("database_id")
}
if parentID == "" {
parentID = parent.string("data_source_id")
}
collectionID := opts.CollectionID
if collectionID == "" && (parent.string("type") == "database_id" || parent.string("type") == "data_source_id") {
collectionID = parentID
}
spaceID := parent.string("workspace")
p := store.Page{
ID: page.string("id"),
SpaceID: spaceID,
ParentID: parentID,
ParentTable: parent.string("type"),
CollectionID: collectionID,
Title: titleFromAPIPage(page),
URL: page.string("url"),
PropertiesJSON: props,
@ -184,17 +220,85 @@ func (c Client) ingestPage(ctx context.Context, st *store.Store, page obj) (bloc
if err := st.UpsertPage(ctx, p); err != nil {
return 0, 0, err
}
blocks, err := c.walkBlocks(ctx, st, p.ID, p.ID, p.SpaceID)
if err != nil {
return 0, 0, err
var blocks, comments int
if opts.FetchBlocks {
blocks, err = c.walkBlocks(ctx, st, p.ID, p.ID, p.SpaceID)
if err != nil {
return 0, 0, err
}
}
comments, err := c.ingestComments(ctx, st, p.ID, p.SpaceID)
if err != nil {
return 0, 0, err
if opts.FetchComments {
comments, err = c.ingestComments(ctx, st, p.ID, p.SpaceID)
if err != nil {
return 0, 0, err
}
}
return blocks, comments, nil
}
func (c Client) ingestDatabase(ctx context.Context, st *store.Store, database obj) (int, error) {
id := database.string("id")
raw := notiontext.MarshalRaw(database)
parent := database.mapObj("parent")
name := notiontext.Plain(database["title"])
if name == "" {
name = id
}
if err := st.UpsertCollection(ctx, store.Collection{
ID: id,
SpaceID: parent.string("workspace"),
ParentID: firstNonEmpty(parent.string("page_id"), parent.string("block_id"), parent.string("workspace")),
Name: name,
SchemaJSON: marshalAny(database["properties"]),
FormatJSON: marshalAny(database),
RawJSON: raw,
Source: SourceName,
SyncedAt: store.NowMS(),
}); err != nil {
return 0, err
}
if err := st.UpsertRawRecord(ctx, store.RawRecord{
Source: SourceName, RecordTable: "database", RecordID: id, ParentID: parent.string("page_id"),
SpaceID: parent.string("workspace"), RawJSON: raw, SyncedAt: store.NowMS(),
}); err != nil {
return 0, err
}
return c.queryDatabase(ctx, st, id)
}
func (c Client) queryDatabase(ctx context.Context, st *store.Store, databaseID string) (int, error) {
var count int
cursor := ""
for {
body := obj{"page_size": 100}
if cursor != "" {
body["start_cursor"] = cursor
}
var resp obj
path := fmt.Sprintf("/databases/%s/query", url.PathEscape(databaseID))
if err := c.do(ctx, http.MethodPost, path, body, &resp); err != nil {
return count, err
}
for _, item := range asSlice(resp["results"]) {
m, ok := item.(map[string]any)
if !ok {
continue
}
if _, _, err := c.ingestPage(ctx, st, obj(m), ingestPageOptions{CollectionID: databaseID}); err != nil {
return count, err
}
count++
}
if !truthy(resp["has_more"]) {
return count, nil
}
cursor, _ = resp["next_cursor"].(string)
if cursor == "" {
return count, nil
}
}
}
func (c Client) walkBlocks(ctx context.Context, st *store.Store, pageID, parentID, spaceID string) (int, error) {
var count int
cursor := ""
@ -405,3 +509,12 @@ func asSlice(v any) []any {
}
return nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if value != "" {
return value
}
}
return ""
}

View File

@ -0,0 +1,99 @@
package notionapi
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"path/filepath"
"testing"
"github.com/vincentkoc/notcrawl/internal/store"
)
func TestSyncIngestsDatabasesAndRows(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
switch r.URL.Path {
case "/users":
_, _ = w.Write([]byte(`{"object":"list","results":[],"has_more":false}`))
case "/search":
var body map[string]any
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
t.Fatal(err)
}
filter := body["filter"].(map[string]any)
switch filter["value"] {
case "page":
_, _ = w.Write([]byte(`{"object":"list","results":[],"has_more":false}`))
case "database":
_, _ = w.Write([]byte(`{
"object":"list",
"results":[{
"object":"database",
"id":"db1",
"title":[{"plain_text":"Roadmap"}],
"parent":{"type":"workspace","workspace":true},
"properties":{
"Name":{"id":"title","type":"title","title":{}},
"Status":{"id":"status","type":"select","select":{}}
}
}],
"has_more":false
}`))
default:
t.Fatalf("unexpected search filter: %v", filter["value"])
}
case "/databases/db1/query":
_, _ = w.Write([]byte(`{
"object":"list",
"results":[{
"object":"page",
"id":"page1",
"created_time":"2026-01-01T00:00:00Z",
"last_edited_time":"2026-01-02T00:00:00Z",
"archived":false,
"in_trash":false,
"url":"https://notion.so/page1",
"parent":{"type":"database_id","database_id":"db1"},
"properties":{
"Name":{"id":"title","type":"title","title":[{"plain_text":"Ship"}]},
"Status":{"id":"status","type":"select","select":{"name":"Done"}}
}
}],
"has_more":false
}`))
default:
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.String())
}
}))
defer server.Close()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
summary, err := (Client{BaseURL: server.URL, Version: "2022-06-28", Token: "secret"}).Sync(context.Background(), st)
if err != nil {
t.Fatal(err)
}
if summary.Databases != 1 || summary.DatabaseRows != 1 {
t.Fatalf("unexpected summary: %+v", summary)
}
collections, err := st.Collections(context.Background())
if err != nil {
t.Fatal(err)
}
if len(collections) != 1 || collections[0].ID != "db1" || collections[0].Name != "Roadmap" {
t.Fatalf("unexpected collections: %+v", collections)
}
rows, err := st.CollectionPages(context.Background(), "db1")
if err != nil {
t.Fatal(err)
}
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "db1" {
t.Fatalf("unexpected rows: %+v", rows)
}
}

View File

@ -27,6 +27,53 @@ func (s *Store) Pages(ctx context.Context) ([]Page, error) {
return pages, rows.Err()
}
func (s *Store) Collections(ctx context.Context) ([]Collection, error) {
rows, err := s.db.QueryContext(ctx, `select id, space_id, parent_id, name, schema_json, format_json, raw_json, source, synced_at
from collections order by lower(coalesce(name, id)), id`)
if err != nil {
return nil, err
}
defer rows.Close()
var collections []Collection
for rows.Next() {
var c Collection
if err := rows.Scan(&c.ID, &c.SpaceID, &c.ParentID, &c.Name, &c.SchemaJSON, &c.FormatJSON, &c.RawJSON, &c.Source, &c.SyncedAt); err != nil {
return nil, err
}
collections = append(collections, c)
}
return collections, rows.Err()
}
func (s *Store) Collection(ctx context.Context, id string) (Collection, error) {
var c Collection
err := s.db.QueryRowContext(ctx, `select id, space_id, parent_id, name, schema_json, format_json, raw_json, source, synced_at
from collections where id = ?`, id).Scan(&c.ID, &c.SpaceID, &c.ParentID, &c.Name, &c.SchemaJSON, &c.FormatJSON, &c.RawJSON, &c.Source, &c.SyncedAt)
return c, err
}
func (s *Store) CollectionPages(ctx context.Context, collectionID string) ([]Page, error) {
rows, err := s.db.QueryContext(ctx, `select id, space_id, parent_id, parent_table, collection_id, title, url, icon, cover,
properties_json, created_time, last_edited_time, alive, source, raw_json, synced_at
from pages where collection_id = ? and alive = 1 order by coalesce(last_edited_time, 0) desc, title`, collectionID)
if err != nil {
return nil, err
}
defer rows.Close()
var pages []Page
for rows.Next() {
var p Page
var alive int
if err := rows.Scan(&p.ID, &p.SpaceID, &p.ParentID, &p.ParentTable, &p.CollectionID, &p.Title, &p.URL, &p.Icon, &p.Cover,
&p.PropertiesJSON, &p.CreatedTime, &p.LastEditedTime, &alive, &p.Source, &p.RawJSON, &p.SyncedAt); err != nil {
return nil, err
}
p.Alive = IntBool(alive)
pages = append(pages, p)
}
return pages, rows.Err()
}
func (s *Store) PageBlocks(ctx context.Context, pageID string) ([]Block, error) {
rows, err := s.db.QueryContext(ctx, `select id, page_id, space_id, parent_id, parent_table, type, text, properties_json,
content_json, format_json, created_time, last_edited_time, alive, source, raw_json, synced_at

View File

@ -0,0 +1,305 @@
package tableexport
import (
"context"
"encoding/csv"
"encoding/json"
"fmt"
"io"
"sort"
"strconv"
"strings"
"github.com/vincentkoc/notcrawl/internal/notiontext"
"github.com/vincentkoc/notcrawl/internal/store"
)
type Format string
const (
FormatCSV Format = "csv"
FormatTSV Format = "tsv"
)
type Exporter struct {
Store *store.Store
}
type Summary struct {
Database string
Rows int
Columns int
}
func (e Exporter) Export(ctx context.Context, databaseID string, format Format, w io.Writer) (Summary, error) {
if e.Store == nil {
return Summary{}, fmt.Errorf("missing store")
}
if databaseID == "" {
return Summary{}, fmt.Errorf("database id is required")
}
collection, err := e.Store.Collection(ctx, databaseID)
if err != nil {
return Summary{}, err
}
pages, err := e.Store.CollectionPages(ctx, databaseID)
if err != nil {
return Summary{}, err
}
columns := columnsFor(collection, pages)
writer := csv.NewWriter(w)
if format == FormatTSV {
writer.Comma = '\t'
} else if format != "" && format != FormatCSV {
return Summary{}, fmt.Errorf("unsupported format %q", format)
}
if err := writer.Write(columns); err != nil {
return Summary{}, err
}
for _, page := range pages {
props := decodeMap(page.PropertiesJSON)
row := make([]string, 0, len(columns))
for _, col := range columns {
switch col {
case "page_id":
row = append(row, page.ID)
case "page_title":
row = append(row, page.Title)
case "url":
row = append(row, page.URL)
default:
row = append(row, propertyValueText(props[col]))
}
}
if err := writer.Write(row); err != nil {
return Summary{}, err
}
}
writer.Flush()
if err := writer.Error(); err != nil {
return Summary{}, err
}
return Summary{Database: collection.ID, Rows: len(pages), Columns: len(columns)}, nil
}
func columnsFor(collection store.Collection, pages []store.Page) []string {
seen := map[string]bool{"page_id": true, "page_title": true, "url": true}
cols := []string{"page_id", "page_title", "url"}
for _, name := range schemaPropertyNames(collection.SchemaJSON) {
if !seen[name] {
seen[name] = true
cols = append(cols, name)
}
}
var extras []string
for _, page := range pages {
for name := range decodeMap(page.PropertiesJSON) {
if !seen[name] {
seen[name] = true
extras = append(extras, name)
}
}
}
sort.Strings(extras)
return append(cols, extras...)
}
func schemaPropertyNames(raw string) []string {
props := decodeMap(raw)
var title []string
var rest []string
for name, value := range props {
m, ok := value.(map[string]any)
if ok && m["type"] == "title" {
title = append(title, name)
continue
}
rest = append(rest, name)
}
sort.Strings(title)
sort.Strings(rest)
return append(title, rest...)
}
func decodeMap(raw string) map[string]any {
out := map[string]any{}
if strings.TrimSpace(raw) == "" {
return out
}
_ = json.Unmarshal([]byte(raw), &out)
return out
}
func propertyValueText(v any) string {
m, ok := v.(map[string]any)
if !ok {
return notiontext.Plain(v)
}
typ, _ := m["type"].(string)
if typ == "" {
return notiontext.Plain(v)
}
switch typ {
case "title", "rich_text":
return notiontext.Plain(m[typ])
case "number":
return numberText(m["number"])
case "select", "status":
return namedObject(m[typ])
case "multi_select":
return joinNamed(m[typ])
case "date":
return dateText(m["date"])
case "checkbox":
if b, ok := m["checkbox"].(bool); ok {
return strconv.FormatBool(b)
}
case "url", "email", "phone_number", "created_time", "last_edited_time":
if s, ok := m[typ].(string); ok {
return s
}
case "people", "files":
return joinNamed(m[typ])
case "relation":
return joinIDs(m[typ])
case "formula":
return formulaText(m["formula"])
case "rollup":
return rollupText(m["rollup"])
case "created_by", "last_edited_by":
return namedObject(m[typ])
case "unique_id":
return uniqueIDText(m["unique_id"])
}
return notiontext.Plain(v)
}
func namedObject(v any) string {
m, ok := v.(map[string]any)
if !ok {
return ""
}
if name, ok := m["name"].(string); ok {
return name
}
if id, ok := m["id"].(string); ok {
return id
}
return notiontext.Plain(v)
}
func joinNamed(v any) string {
items, ok := v.([]any)
if !ok {
return ""
}
parts := make([]string, 0, len(items))
for _, item := range items {
if text := namedObject(item); text != "" {
parts = append(parts, text)
}
}
return strings.Join(parts, ", ")
}
func joinIDs(v any) string {
items, ok := v.([]any)
if !ok {
return ""
}
parts := make([]string, 0, len(items))
for _, item := range items {
m, ok := item.(map[string]any)
if !ok {
continue
}
if id, ok := m["id"].(string); ok {
parts = append(parts, id)
}
}
return strings.Join(parts, ", ")
}
func dateText(v any) string {
m, ok := v.(map[string]any)
if !ok {
return ""
}
start, _ := m["start"].(string)
end, _ := m["end"].(string)
if end != "" {
return start + "/" + end
}
return start
}
func formulaText(v any) string {
m, ok := v.(map[string]any)
if !ok {
return ""
}
typ, _ := m["type"].(string)
switch typ {
case "string":
s, _ := m["string"].(string)
return s
case "number":
return numberText(m["number"])
case "boolean":
if b, ok := m["boolean"].(bool); ok {
return strconv.FormatBool(b)
}
case "date":
return dateText(m["date"])
}
return notiontext.Plain(v)
}
func rollupText(v any) string {
m, ok := v.(map[string]any)
if !ok {
return ""
}
typ, _ := m["type"].(string)
switch typ {
case "number":
return numberText(m["number"])
case "date":
return dateText(m["date"])
case "array":
items, _ := m["array"].([]any)
parts := make([]string, 0, len(items))
for _, item := range items {
if text := propertyValueText(item); text != "" {
parts = append(parts, text)
}
}
return strings.Join(parts, ", ")
}
return notiontext.Plain(v)
}
func uniqueIDText(v any) string {
m, ok := v.(map[string]any)
if !ok {
return ""
}
prefix, _ := m["prefix"].(string)
number := numberText(m["number"])
return prefix + number
}
func numberText(v any) string {
switch x := v.(type) {
case nil:
return ""
case float64:
return strconv.FormatFloat(x, 'f', -1, 64)
case int:
return strconv.Itoa(x)
case json.Number:
return x.String()
default:
return fmt.Sprint(x)
}
}

View File

@ -0,0 +1,47 @@
package tableexport
import (
"bytes"
"context"
"path/filepath"
"strings"
"testing"
"github.com/vincentkoc/notcrawl/internal/store"
)
func TestExportDatabaseTSV(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
now := store.NowMS()
if err := st.UpsertCollection(ctx, store.Collection{
ID: "db1", Name: "Roadmap", Source: "test", SyncedAt: now,
SchemaJSON: `{"Name":{"type":"title"},"Status":{"type":"select"},"Score":{"type":"number"}}`,
}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{
ID: "page1", CollectionID: "db1", Title: "Ship", URL: "https://example.com/ship", Alive: true, Source: "test", SyncedAt: now,
PropertiesJSON: `{"Name":{"type":"title","title":[{"plain_text":"Ship"}]},"Status":{"type":"select","select":{"name":"Done"}},"Score":{"type":"number","number":7}}`,
}); err != nil {
t.Fatal(err)
}
var out bytes.Buffer
s, err := (Exporter{Store: st}).Export(ctx, "db1", FormatTSV, &out)
if err != nil {
t.Fatal(err)
}
if s.Rows != 1 {
t.Fatalf("expected one row, got %d", s.Rows)
}
got := out.String()
for _, want := range []string{"page_id\tpage_title\turl\tName\tScore\tStatus", "page1\tShip\thttps://example.com/ship\tShip\t7\tDone"} {
if !strings.Contains(got, want) {
t.Fatalf("missing %q in:\n%s", want, got)
}
}
}