Compare commits

...

9 Commits

Author SHA1 Message Date
Vincent Koc
e280c7f350
fix(release): generate auditable Homebrew formula
Some checks failed
Release Drafter / autolabel (push) Has been cancelled
Release Drafter / Update Release Draft (push) Has been cancelled
Validation / validate (push) Has been cancelled
2026-04-29 13:06:57 -07:00
Vincent Koc
b687296e43
fix(release): validate Homebrew tap push token 2026-04-29 13:02:21 -07:00
Vincent Koc
a88136abad
fix(release): publish formula to Homebrew tap (#26)
Fixes #25
2026-04-29 12:27:26 -07:00
Vincent Koc
fc30106579
fix(export): clean emoji from filenames (#24) 2026-04-29 05:13:19 -07:00
Vincent Koc
bd4b1977b6
fix(export): render desktop database CSVs correctly (#23) 2026-04-29 05:03:43 -07:00
Vincent Koc
ea691828c6
fix(export): add bulk database export (#22) 2026-04-29 04:32:52 -07:00
Vincent Koc
2dcf13a432
fix(api): retry transient Notion failures (#21) 2026-04-29 04:17:02 -07:00
Vincent Koc
091db7b53d
fix(api): dedupe Notion rich text titles (#20)
Fixes #19
2026-04-29 04:08:34 -07:00
Vincent Koc
d233112b04
ci(release): update goreleaser action
Update GoReleaser action to v7 for the release workflow.
2026-04-27 14:15:30 -07:00
14 changed files with 720 additions and 107 deletions

View File

@ -27,19 +27,27 @@ jobs:
update-tap:
runs-on: ubuntu-latest
if: startsWith(github.event.release.tag_name || inputs.tag_name, 'v')
env:
TAP_REPO: ${{ vars.HOMEBREW_TAP_REPO || 'vincentkoc/homebrew-tap' }}
steps:
- name: Validate tap configuration
env:
GH_TOKEN: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }}
run: |
set -euo pipefail
if [ -z "${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }}" ]; then
if [ -z "${GH_TOKEN}" ]; then
echo "Secret HOMEBREW_TAP_GITHUB_TOKEN is required."
exit 1
fi
if [ "$(gh api "repos/${TAP_REPO}" --jq '.permissions.push // false')" != "true" ]; then
echo "HOMEBREW_TAP_GITHUB_TOKEN must have push access to ${TAP_REPO}."
exit 1
fi
- name: Checkout tap repository
uses: actions/checkout@v5
with:
repository: ${{ vars.HOMEBREW_TAP_REPO || 'vincentkoc/tap' }}
repository: ${{ env.TAP_REPO }}
token: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }}
- name: Update formula
@ -48,7 +56,6 @@ jobs:
SOURCE_REPO: ${{ github.repository }}
run: |
set -euo pipefail
VERSION="${TAG#v}"
SOURCE_URL="https://github.com/${SOURCE_REPO}/archive/refs/tags/${TAG}.tar.gz"
curl -fsSL "${SOURCE_URL}" -o /tmp/notcrawl-src.tar.gz
@ -62,7 +69,6 @@ jobs:
url "${SOURCE_URL}"
sha256 "${SHA256}"
license "MIT"
version "${VERSION}"
depends_on "go" => :build

View File

@ -50,7 +50,7 @@ jobs:
echo "RELEASE_VERSION=${TAG#v}" >> "$GITHUB_ENV"
- name: Build release artifacts
uses: goreleaser/goreleaser-action@v6
uses: goreleaser/goreleaser-action@v7
with:
distribution: goreleaser
version: "~> v2"

View File

@ -59,6 +59,7 @@ export NOTION_TOKEN="secret_..."
notcrawl sync --source api
notcrawl databases
notcrawl export-db --database DATABASE_ID --format csv --output roadmap.csv
notcrawl export-db --all --dir exports/csv
```
Default paths:
@ -79,7 +80,7 @@ Default paths:
- `sync` ingests from `desktop`, `api`, or `all`
- `export-md` renders normalized Markdown files from SQLite
- `databases` lists crawled Notion databases
- `export-db` exports a crawled Notion database to CSV or TSV
- `export-db` exports one crawled Notion database, or all databases with `--all --dir`, to CSV or TSV
- `search` searches page and comment text through FTS5
- `sql` runs read-only SQL against the archive
- `publish` exports SQLite tables and Markdown into a git share repo

View File

@ -163,6 +163,7 @@ those pages through `pages.collection_id`.
```text
notcrawl export-db --database <database-id> --format csv --output rows.csv
notcrawl export-db --database <database-id> --format tsv --output rows.tsv
notcrawl export-db --all --dir exports/csv
```
The first columns are stable metadata:

View File

@ -8,12 +8,14 @@ import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/vincentkoc/notcrawl/internal/config"
"github.com/vincentkoc/notcrawl/internal/markdown"
"github.com/vincentkoc/notcrawl/internal/notionapi"
"github.com/vincentkoc/notcrawl/internal/notiondesktop"
"github.com/vincentkoc/notcrawl/internal/notiontext"
"github.com/vincentkoc/notcrawl/internal/report"
"github.com/vincentkoc/notcrawl/internal/share"
"github.com/vincentkoc/notcrawl/internal/store"
@ -276,11 +278,25 @@ func runDatabases(ctx context.Context, stdout io.Writer, cfg config.Config) erro
func runExportDatabase(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
fs := flag.NewFlagSet("export-db", flag.ContinueOnError)
databaseID := fs.String("database", "", "database id to export")
all := fs.Bool("all", false, "export every crawled database")
dir := fs.String("dir", "", "directory for --all exports")
format := fs.String("format", "csv", "output format: csv or tsv")
output := fs.String("output", "", "output file path, defaults to stdout")
if err := fs.Parse(args); err != nil {
return err
}
if *all {
if *databaseID != "" {
return fmt.Errorf("export-db cannot combine --all and --database")
}
if *output != "" {
return fmt.Errorf("export-db cannot combine --all and --output")
}
if *dir == "" {
return fmt.Errorf("export-db --all requires --dir")
}
return runExportAllDatabases(ctx, stdout, cfg, tableexport.Format(*format), *dir)
}
if *databaseID == "" {
return fmt.Errorf("export-db requires --database")
}
@ -313,6 +329,89 @@ func runExportDatabase(ctx context.Context, stdout io.Writer, cfg config.Config,
return nil
}
func runExportAllDatabases(ctx context.Context, stdout io.Writer, cfg config.Config, format tableexport.Format, dir string) error {
ext, err := exportExtension(format)
if err != nil {
return err
}
dir, err = config.ExpandPath(dir)
if err != nil {
return err
}
if err := os.MkdirAll(dir, 0o755); err != nil {
return err
}
st, err := store.Open(cfg.DBPath)
if err != nil {
return err
}
defer st.Close()
collections, err := st.Collections(ctx)
if err != nil {
return err
}
index, err := os.Create(filepath.Join(dir, "index.tsv"))
if err != nil {
return err
}
fmt.Fprintln(index, "id\tname\tsource\trows\tcolumns\tfile")
exporter := tableexport.Exporter{Store: st}
used := map[string]bool{}
var databases, rows int
for _, collection := range collections {
name := exportDatabaseFilename(collection, ext, used)
path := filepath.Join(dir, name)
file, err := os.Create(path)
if err != nil {
_ = index.Close()
return err
}
s, exportErr := exporter.Export(ctx, collection.ID, format, file)
closeErr := file.Close()
if exportErr != nil {
_ = index.Close()
return exportErr
}
if closeErr != nil {
_ = index.Close()
return closeErr
}
databases++
rows += s.Rows
fmt.Fprintf(index, "%s\t%s\t%s\t%d\t%d\t%s\n", collection.ID, collection.Name, collection.Source, s.Rows, s.Columns, name)
}
if err := index.Close(); err != nil {
return err
}
fmt.Fprintf(stdout, "exported %d databases and %d rows to %s\n", databases, rows, dir)
return nil
}
func exportExtension(format tableexport.Format) (string, error) {
switch format {
case "", tableexport.FormatCSV:
return "csv", nil
case tableexport.FormatTSV:
return "tsv", nil
default:
return "", fmt.Errorf("unsupported format %q", format)
}
}
func exportDatabaseFilename(collection store.Collection, ext string, used map[string]bool) string {
baseName := collection.Name
if strings.TrimSpace(baseName) == "" {
baseName = collection.ID
}
base := notiontext.Slug(baseName) + "-" + notiontext.ShortID(collection.ID)
name := base + "." + ext
for i := 2; used[name]; i++ {
name = fmt.Sprintf("%s-%d.%s", base, i, ext)
}
used[name] = true
return name
}
func runSearch(ctx context.Context, stdout io.Writer, cfg config.Config, args []string) error {
if len(args) == 0 {
return fmt.Errorf("search query required")
@ -489,6 +588,7 @@ Commands:
export-md Render normalized Markdown from SQLite
databases List crawled Notion databases
export-db --database ID Export a database as CSV or TSV
export-db --all --dir DIR Export every database as CSV or TSV
search QUERY Search page text
sql QUERY Run read-only SQL
publish [--push] Export data and Markdown into a git share repo

View File

@ -1,6 +1,15 @@
package main
import "testing"
import (
"bytes"
"context"
"os"
"path/filepath"
"strings"
"testing"
"github.com/vincentkoc/notcrawl/internal/store"
)
func TestSearchFieldCollapsesRecordSeparators(t *testing.T) {
got := searchField("line one\nline\ttwo line three")
@ -8,3 +17,55 @@ func TestSearchFieldCollapsesRecordSeparators(t *testing.T) {
t.Fatalf("unexpected field: %q", got)
}
}
func TestExportDatabaseAllWritesFilesAndIndex(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
dbPath := filepath.Join(dir, "notcrawl.db")
st, err := store.Open(dbPath)
if err != nil {
t.Fatal(err)
}
now := store.NowMS()
for _, collection := range []store.Collection{
{ID: "db1", Name: "Roadmap", Source: "test", SyncedAt: now, SchemaJSON: `{"Name":{"type":"title"}}`},
{ID: "db2", Name: "Launch 🚀 Plan ✅", Source: "test", SyncedAt: now, SchemaJSON: `{"Task":{"type":"title"}}`},
} {
if err := st.UpsertCollection(ctx, collection); err != nil {
t.Fatal(err)
}
}
if err := st.UpsertPage(ctx, store.Page{
ID: "page1", CollectionID: "db1", Title: "Ship", URL: "https://example.com/ship", Alive: true, Source: "test", SyncedAt: now,
PropertiesJSON: `{"Name":{"type":"title","title":[{"plain_text":"Ship"}]}}`,
}); err != nil {
t.Fatal(err)
}
if err := st.Close(); err != nil {
t.Fatal(err)
}
outDir := filepath.Join(dir, "csv")
var stdout, stderr bytes.Buffer
err = run(ctx, []string{"--config", filepath.Join(dir, "missing.toml"), "--db", dbPath, "export-db", "--all", "--dir", outDir}, &stdout, &stderr)
if err != nil {
t.Fatalf("export-db --all failed: %v\nstderr:\n%s", err, stderr.String())
}
if got := stdout.String(); !strings.Contains(got, "exported 2 databases and 1 rows") {
t.Fatalf("unexpected stdout: %s", got)
}
for _, name := range []string{"roadmap-db1.csv", "launch-plan-db2.csv", "index.tsv"} {
if _, err := os.Stat(filepath.Join(outDir, name)); err != nil {
t.Fatalf("missing %s: %v", name, err)
}
}
index, err := os.ReadFile(filepath.Join(outDir, "index.tsv"))
if err != nil {
t.Fatal(err)
}
for _, want := range []string{"id\tname\tsource\trows\tcolumns\tfile", "db1\tRoadmap\ttest\t1\t4\troadmap-db1.csv"} {
if !strings.Contains(string(index), want) {
t.Fatalf("index missing %q:\n%s", want, index)
}
}
}

View File

@ -79,7 +79,7 @@ func TestExporterUsesDisplayOrder(t *testing.T) {
}
}
func TestExporterPreservesUnicodePathNames(t *testing.T) {
func TestExporterRemovesEmojiFromPathNames(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
@ -99,7 +99,7 @@ func TestExporterPreservesUnicodePathNames(t *testing.T) {
if err != nil {
t.Fatal(err)
}
want := filepath.Join(dir, "研究-🚀", "計画-✅-q2-page1.md")
want := filepath.Join(dir, "研究", "計画-q2-page1.md")
if len(s.Files) != 1 || s.Files[0] != want {
t.Fatalf("unexpected export path: %+v, want %s", s.Files, want)
}

View File

@ -17,6 +17,8 @@ import (
const SourceName = "api"
const maxAPIAttempts = 4
type Client struct {
BaseURL string
Version string
@ -448,56 +450,50 @@ func (c Client) ingestComments(ctx context.Context, st *store.Store, pageID, spa
}
func (c Client) do(ctx context.Context, method, path string, body any, out any) error {
var reader io.Reader
var bodyBytes []byte
if body != nil {
b, err := json.Marshal(body)
if err != nil {
return err
}
reader = bytes.NewReader(b)
bodyBytes = b
}
req, err := http.NewRequestWithContext(ctx, method, strings.TrimRight(c.BaseURL, "/")+path, reader)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer "+c.Token)
req.Header.Set("Notion-Version", c.Version)
req.Header.Set("Accept", "application/json")
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := c.HTTP.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusTooManyRequests {
if wait, err := time.ParseDuration(resp.Header.Get("Retry-After") + "s"); err == nil && wait > 0 {
timer := time.NewTimer(wait)
select {
case <-ctx.Done():
timer.Stop()
return ctx.Err()
case <-timer.C:
}
return c.do(ctx, method, path, body, out)
for attempt := 1; attempt <= maxAPIAttempts; attempt++ {
var reader io.Reader
if bodyBytes != nil {
reader = bytes.NewReader(bodyBytes)
}
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
req, err := http.NewRequestWithContext(ctx, method, strings.TrimRight(c.BaseURL, "/")+path, reader)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer "+c.Token)
req.Header.Set("Notion-Version", c.Version)
req.Header.Set("Accept", "application/json")
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := c.HTTP.Do(req)
if err != nil {
return err
}
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
defer resp.Body.Close()
return json.NewDecoder(resp.Body).Decode(out)
}
b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
bodyText := strings.TrimSpace(string(b))
apiErr := notionAPIError{Method: method, Path: path, Status: resp.Status, StatusCode: resp.StatusCode, Body: bodyText}
var payload struct {
Code string `json:"code"`
Message string `json:"message"`
}
if err := json.Unmarshal(b, &payload); err == nil {
apiErr.Code = payload.Code
apiErr.Message = payload.Message
resp.Body.Close()
apiErr := apiErrorFromResponse(method, path, resp, b)
if attempt < maxAPIAttempts && shouldRetry(apiErr) {
if err := waitBeforeRetry(ctx, apiErr.RetryAfter); err != nil {
return err
}
continue
}
return apiErr
}
return json.NewDecoder(resp.Body).Decode(out)
return nil
}
type notionAPIError struct {
@ -508,6 +504,8 @@ type notionAPIError struct {
Code string
Message string
Body string
RetryAfter time.Duration
Retryable bool
}
func (e notionAPIError) Error() string {
@ -517,6 +515,76 @@ func (e notionAPIError) Error() string {
return fmt.Sprintf("notion api %s %s: %s: %s", e.Method, e.Path, e.Status, e.Body)
}
func apiErrorFromResponse(method, path string, resp *http.Response, body []byte) notionAPIError {
bodyText := strings.TrimSpace(string(body))
apiErr := notionAPIError{
Method: method,
Path: path,
Status: resp.Status,
StatusCode: resp.StatusCode,
Body: bodyText,
RetryAfter: retryAfter(resp.Header.Get("Retry-After"), body),
}
var payload struct {
Code string `json:"code"`
Message string `json:"message"`
Retryable bool `json:"retryable"`
RetryAfter float64 `json:"retry_after"`
}
if err := json.Unmarshal(body, &payload); err == nil {
apiErr.Code = payload.Code
apiErr.Message = payload.Message
apiErr.Retryable = payload.Retryable
if payload.RetryAfter > 0 && apiErr.RetryAfter == 0 {
apiErr.RetryAfter = time.Duration(payload.RetryAfter * float64(time.Second))
}
}
return apiErr
}
func shouldRetry(err notionAPIError) bool {
if err.StatusCode == http.StatusTooManyRequests || err.Retryable {
return true
}
return err.StatusCode == http.StatusBadGateway ||
err.StatusCode == http.StatusServiceUnavailable ||
err.StatusCode == http.StatusGatewayTimeout
}
func retryAfter(header string, body []byte) time.Duration {
if header != "" {
if seconds, err := time.ParseDuration(header + "s"); err == nil && seconds > 0 {
return seconds
}
if when, err := http.ParseTime(header); err == nil {
if wait := time.Until(when); wait > 0 {
return wait
}
}
}
var payload struct {
RetryAfter float64 `json:"retry_after"`
}
if err := json.Unmarshal(body, &payload); err == nil && payload.RetryAfter > 0 {
return time.Duration(payload.RetryAfter * float64(time.Second))
}
return 0
}
func waitBeforeRetry(ctx context.Context, wait time.Duration) error {
if wait <= 0 {
return nil
}
timer := time.NewTimer(wait)
defer timer.Stop()
select {
case <-ctx.Done():
return ctx.Err()
case <-timer.C:
return nil
}
}
func isIgnoredCommentError(err error) bool {
apiErr, ok := err.(notionAPIError)
if !ok {

View File

@ -32,7 +32,7 @@ func TestSyncIngestsDatabasesAndRows(t *testing.T) {
"results":[{
"object":"database",
"id":"db1",
"title":[{"plain_text":"Roadmap"}],
"title":[{"type":"text","plain_text":"Roadmap","text":{"content":"Roadmap"}}],
"parent":{"type":"workspace","workspace":true},
"properties":{
"Name":{"id":"title","type":"title","title":{}},
@ -57,7 +57,7 @@ func TestSyncIngestsDatabasesAndRows(t *testing.T) {
"url":"https://notion.so/page1",
"parent":{"type":"database_id","database_id":"db1"},
"properties":{
"Name":{"id":"title","type":"title","title":[{"plain_text":"Ship"}]},
"Name":{"id":"title","type":"title","title":[{"type":"text","plain_text":"Ship","text":{"content":"Ship"}}]},
"Status":{"id":"status","type":"select","select":{"name":"Done"}}
}
}],
@ -93,7 +93,7 @@ func TestSyncIngestsDatabasesAndRows(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "db1" {
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "db1" || rows[0].Title != "Ship" {
t.Fatalf("unexpected rows: %+v", rows)
}
}
@ -122,7 +122,7 @@ func TestSyncIngestsCurrentDataSourcesAndRows(t *testing.T) {
"results":[{
"object":"data_source",
"id":"ds1",
"title":[{"plain_text":"Roadmap"}],
"title":[{"type":"text","plain_text":"Roadmap","text":{"content":"Roadmap"}}],
"parent":{"type":"database_id","database_id":"db1"},
"database_parent":{"type":"page_id","page_id":"page-parent"},
"properties":{
@ -147,7 +147,7 @@ func TestSyncIngestsCurrentDataSourcesAndRows(t *testing.T) {
"url":"https://notion.so/page1",
"parent":{"type":"data_source_id","data_source_id":"ds1"},
"properties":{
"Name":{"id":"title","type":"title","title":[{"plain_text":"Ship"}]},
"Name":{"id":"title","type":"title","title":[{"type":"text","plain_text":"Ship","text":{"content":"Ship"}}]},
"Status":{"id":"status","type":"select","select":{"name":"Done"}}
}
}],
@ -176,14 +176,14 @@ func TestSyncIngestsCurrentDataSourcesAndRows(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if len(collections) != 1 || collections[0].ID != "ds1" || collections[0].ParentID != "db1" {
if len(collections) != 1 || collections[0].ID != "ds1" || collections[0].ParentID != "db1" || collections[0].Name != "Roadmap" {
t.Fatalf("unexpected collections: %+v", collections)
}
rows, err := st.CollectionPages(context.Background(), "ds1")
if err != nil {
t.Fatal(err)
}
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "ds1" {
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "ds1" || rows[0].Title != "Ship" {
t.Fatalf("unexpected rows: %+v", rows)
}
}
@ -213,3 +213,52 @@ func TestIngestCommentsSkipsRestrictedResource(t *testing.T) {
t.Fatalf("unexpected comment count: %d", count)
}
}
func TestIngestCommentsRetriesTransientGatewayError(t *testing.T) {
attempts := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if r.URL.Path != "/comments" {
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.String())
}
attempts++
if attempts == 1 {
w.WriteHeader(http.StatusBadGateway)
_, _ = w.Write([]byte(`{"retryable":true,"retry_after":0}`))
return
}
_, _ = w.Write([]byte(`{
"object":"list",
"results":[{
"id":"comment1",
"rich_text":[{"type":"text","plain_text":"Looks good","text":{"content":"Looks good"}}],
"created_by":{"id":"user1"},
"created_time":"2026-01-01T00:00:00Z",
"last_edited_time":"2026-01-01T00:00:00Z"
}],
"has_more":false
}`))
}))
defer server.Close()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
count, err := (Client{BaseURL: server.URL, Version: "2026-03-11", Token: "secret", HTTP: http.DefaultClient}).ingestComments(context.Background(), st, "page1", "space1")
if err != nil {
t.Fatal(err)
}
if count != 1 || attempts != 2 {
t.Fatalf("unexpected count/attempts: count=%d attempts=%d", count, attempts)
}
comments, err := st.PageComments(context.Background(), "page1")
if err != nil {
t.Fatal(err)
}
if len(comments) != 1 || comments[0].Text != "Looks good" {
t.Fatalf("unexpected comments: %+v", comments)
}
}

View File

@ -103,7 +103,7 @@ func Slug(s string) string {
}
func isSlugRune(r rune) bool {
return unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsMark(r) || (r > unicode.MaxASCII && unicode.IsSymbol(r)) || r == '\u200d'
return unicode.IsLetter(r) || unicode.IsNumber(r)
}
func isSlugSeparator(r rune) bool {
@ -131,19 +131,42 @@ func walk(v any, parts *[]string) {
walk(item, parts)
}
case map[string]any:
for _, key := range []string{"plain_text", "content", "text", "name", "title"} {
if text, ok := normalizedString(x["plain_text"]); ok {
*parts = append(*parts, text)
return
}
if text, ok := richTextContent(x["text"]); ok {
*parts = append(*parts, text)
return
}
if text, ok := normalizedString(x["content"]); ok {
*parts = append(*parts, text)
return
}
for _, key := range []string{"name", "title", "rich_text", "text"} {
if value, ok := x[key]; ok {
walk(value, parts)
}
}
if rt, ok := x["rich_text"]; ok {
walk(rt, parts)
}
if title, ok := x["title"]; ok {
walk(title, parts)
}
if text, ok := x["text"].(map[string]any); ok {
walk(text["content"], parts)
}
}
}
func richTextContent(v any) (string, bool) {
m, ok := v.(map[string]any)
if !ok {
return "", false
}
return normalizedString(m["content"])
}
func normalizedString(v any) (string, bool) {
s, ok := v.(string)
if !ok {
return "", false
}
s = Normalize(s)
if s == "" {
return "", false
}
return s, true
}

View File

@ -9,6 +9,62 @@ func TestTitleFromProperties(t *testing.T) {
}
}
func TestTitleFromPropertiesPrefersNotionRichTextOnce(t *testing.T) {
got := TitleFromProperties(`{
"Name": {
"id": "title",
"type": "title",
"title": [{
"type": "text",
"plain_text": "OpenClaw",
"text": {"content": "OpenClaw"}
}]
}
}`)
if got != "OpenClaw" {
t.Fatalf("got %q", got)
}
}
func TestPlainPrefersNotionRichTextPlainTextOnce(t *testing.T) {
got := Plain([]any{map[string]any{
"type": "text",
"plain_text": "OpenClaw",
"text": map[string]any{
"content": "OpenClaw",
},
}})
if got != "OpenClaw" {
t.Fatalf("got %q", got)
}
}
func TestPlainFallsBackToNotionTextContentOnce(t *testing.T) {
got := Plain([]any{map[string]any{
"type": "text",
"text": map[string]any{
"content": "OpenClaw",
},
}})
if got != "OpenClaw" {
t.Fatalf("got %q", got)
}
}
func TestPlainWalksTitleOnlyOnce(t *testing.T) {
got := Plain(map[string]any{
"title": []any{map[string]any{
"plain_text": "Roadmap",
"text": map[string]any{
"content": "Roadmap",
},
}},
})
if got != "Roadmap" {
t.Fatalf("got %q", got)
}
}
func TestSlug(t *testing.T) {
got := Slug("Launch Plan / Q2")
if got != "launch-plan-q2" {
@ -16,9 +72,9 @@ func TestSlug(t *testing.T) {
}
}
func TestSlugPreservesUnicodePathText(t *testing.T) {
func TestSlugRemovesEmojiPathText(t *testing.T) {
got := Slug("研究 🚀 / 計画 ✅")
if got != "研究-🚀-計画-✅" {
if got != "研究-計画" {
t.Fatalf("got %q", got)
}
}
@ -30,6 +86,13 @@ func TestSlugRemovesUnsafePathText(t *testing.T) {
}
}
func TestSlugRemovesEmojiVariationSelectors(t *testing.T) {
got := Slug("⚠️ Production Incident Guide")
if got != "production-incident-guide" {
t.Fatalf("got %q", got)
}
}
func TestShortIDKeepsEnoughEntropyForDesktopIDs(t *testing.T) {
got := ShortID("24f71240-0000-0000-0000-123456789abc")
if got != "24f71240-56789abc" {

View File

@ -56,7 +56,10 @@ func (s *Store) Collection(ctx context.Context, id string) (Collection, error) {
func (s *Store) CollectionPages(ctx context.Context, collectionID string) ([]Page, error) {
rows, err := s.queryContext(ctx, `select id, space_id, parent_id, parent_table, collection_id, title, url, icon, cover,
properties_json, created_time, last_edited_time, alive, source, raw_json, synced_at
from pages where collection_id = ? and alive = 1 order by coalesce(last_edited_time, 0) desc, title`, collectionID)
from pages
where alive = 1
and (collection_id = ? or (parent_id = ? and parent_table in ('collection', 'database', 'data_source')))
order by coalesce(last_edited_time, 0) desc, title`, collectionID, collectionID)
if err != nil {
return nil, err
}
@ -119,6 +122,40 @@ func (s *Store) PageComments(ctx context.Context, pageID string) ([]Comment, err
return comments, rows.Err()
}
func (s *Store) UserNames(ctx context.Context) (map[string]string, error) {
rows, err := s.queryContext(ctx, `select id, coalesce(nullif(name, ''), nullif(email, ''), id) from users`)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]string{}
for rows.Next() {
var id, name string
if err := rows.Scan(&id, &name); err != nil {
return nil, err
}
out[id] = name
}
return out, rows.Err()
}
func (s *Store) PageTitles(ctx context.Context) (map[string]string, error) {
rows, err := s.queryContext(ctx, `select id, coalesce(nullif(title, ''), id) from pages where alive = 1`)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]string{}
for rows.Next() {
var id, title string
if err := rows.Scan(&id, &title); err != nil {
return nil, err
}
out[id] = title
}
return out, rows.Err()
}
func (s *Store) SpaceNames(ctx context.Context) (map[string]string, error) {
rows, err := s.queryContext(ctx, `select id, name from spaces`)
if err != nil {

View File

@ -31,6 +31,16 @@ type Summary struct {
Columns int
}
type exportColumn struct {
Key string
Header string
}
type referenceLabels struct {
Users map[string]string
Pages map[string]string
}
func (e Exporter) Export(ctx context.Context, databaseID string, format Format, w io.Writer) (Summary, error) {
if e.Store == nil {
return Summary{}, fmt.Errorf("missing store")
@ -46,21 +56,29 @@ func (e Exporter) Export(ctx context.Context, databaseID string, format Format,
if err != nil {
return Summary{}, err
}
refs, err := e.referenceLabels(ctx)
if err != nil {
return Summary{}, err
}
columns := columnsFor(collection, pages)
headers := make([]string, 0, len(columns))
for _, col := range columns {
headers = append(headers, col.Header)
}
writer := csv.NewWriter(w)
if format == FormatTSV {
writer.Comma = '\t'
} else if format != "" && format != FormatCSV {
return Summary{}, fmt.Errorf("unsupported format %q", format)
}
if err := writer.Write(columns); err != nil {
if err := writer.Write(headers); err != nil {
return Summary{}, err
}
for _, page := range pages {
props := decodeMap(page.PropertiesJSON)
row := make([]string, 0, len(columns))
for _, col := range columns {
switch col {
switch col.Key {
case "page_id":
row = append(row, page.ID)
case "page_title":
@ -68,7 +86,7 @@ func (e Exporter) Export(ctx context.Context, databaseID string, format Format,
case "url":
row = append(row, page.URL)
default:
row = append(row, propertyValueText(props[col]))
row = append(row, propertyValueText(props[col.Key], refs))
}
}
if err := writer.Write(row); err != nil {
@ -82,45 +100,95 @@ func (e Exporter) Export(ctx context.Context, databaseID string, format Format,
return Summary{Database: collection.ID, Rows: len(pages), Columns: len(columns)}, nil
}
func columnsFor(collection store.Collection, pages []store.Page) []string {
seen := map[string]bool{"page_id": true, "page_title": true, "url": true}
cols := []string{"page_id", "page_title", "url"}
for _, name := range schemaPropertyNames(collection.SchemaJSON) {
if !seen[name] {
seen[name] = true
cols = append(cols, name)
func (e Exporter) referenceLabels(ctx context.Context) (referenceLabels, error) {
users, err := e.Store.UserNames(ctx)
if err != nil {
return referenceLabels{}, err
}
pages, err := e.Store.PageTitles(ctx)
if err != nil {
return referenceLabels{}, err
}
return referenceLabels{Users: users, Pages: pages}, nil
}
func columnsFor(collection store.Collection, pages []store.Page) []exportColumn {
seenKeys := map[string]bool{"page_id": true, "page_title": true, "url": true}
seenHeaders := map[string]bool{"page_id": true, "page_title": true, "url": true}
cols := []exportColumn{
{Key: "page_id", Header: "page_id"},
{Key: "page_title", Header: "page_title"},
{Key: "url", Header: "url"},
}
for _, prop := range schemaProperties(collection.SchemaJSON) {
if !seenKeys[prop.Key] {
seenKeys[prop.Key] = true
prop.Header = uniqueHeader(prop.Header, prop.Key, seenHeaders)
cols = append(cols, prop)
}
}
var extras []string
var extras []exportColumn
for _, page := range pages {
for name := range decodeMap(page.PropertiesJSON) {
if !seen[name] {
seen[name] = true
extras = append(extras, name)
for key := range decodeMap(page.PropertiesJSON) {
if !seenKeys[key] {
seenKeys[key] = true
extras = append(extras, exportColumn{Key: key, Header: key})
}
}
}
sort.Strings(extras)
sort.Slice(extras, func(i, j int) bool {
return extras[i].Header < extras[j].Header
})
for i := range extras {
extras[i].Header = uniqueHeader(extras[i].Header, extras[i].Key, seenHeaders)
}
return append(cols, extras...)
}
func schemaPropertyNames(raw string) []string {
func schemaProperties(raw string) []exportColumn {
props := decodeMap(raw)
var title []string
var rest []string
for name, value := range props {
var title []exportColumn
var rest []exportColumn
for key, value := range props {
m, ok := value.(map[string]any)
header := key
if ok {
if name, ok := m["name"].(string); ok && strings.TrimSpace(name) != "" {
header = name
}
}
prop := exportColumn{Key: key, Header: header}
if ok && m["type"] == "title" {
title = append(title, name)
title = append(title, prop)
continue
}
rest = append(rest, name)
rest = append(rest, prop)
}
sort.Strings(title)
sort.Strings(rest)
sort.Slice(title, func(i, j int) bool {
return title[i].Header < title[j].Header
})
sort.Slice(rest, func(i, j int) bool {
return rest[i].Header < rest[j].Header
})
return append(title, rest...)
}
func uniqueHeader(header, key string, seen map[string]bool) string {
if strings.TrimSpace(header) == "" {
header = key
}
if !seen[header] {
seen[header] = true
return header
}
disambiguated := header + " (" + key + ")"
for i := 2; seen[disambiguated]; i++ {
disambiguated = fmt.Sprintf("%s (%s %d)", header, key, i)
}
seen[disambiguated] = true
return disambiguated
}
func decodeMap(raw string) map[string]any {
out := map[string]any{}
if strings.TrimSpace(raw) == "" {
@ -130,7 +198,10 @@ func decodeMap(raw string) map[string]any {
return out
}
func propertyValueText(v any) string {
func propertyValueText(v any, refs referenceLabels) string {
if text, ok := desktopValueText(v, refs); ok {
return text
}
m, ok := v.(map[string]any)
if !ok {
return notiontext.Plain(v)
@ -161,11 +232,11 @@ func propertyValueText(v any) string {
case "people", "files":
return joinNamed(m[typ])
case "relation":
return joinIDs(m[typ])
return joinIDs(m[typ], refs)
case "formula":
return formulaText(m["formula"])
return formulaText(m["formula"], refs)
case "rollup":
return rollupText(m["rollup"])
return rollupText(m["rollup"], refs)
case "created_by", "last_edited_by":
return namedObject(m[typ])
case "unique_id":
@ -174,6 +245,111 @@ func propertyValueText(v any) string {
return notiontext.Plain(v)
}
func desktopValueText(v any, refs referenceLabels) (string, bool) {
text, ok := desktopPlain(v, refs)
if !ok {
return "", false
}
text = notiontext.Normalize(strings.ReplaceAll(text, " , ", ", "))
return text, true
}
func desktopPlain(v any, refs referenceLabels) (string, bool) {
switch x := v.(type) {
case nil:
return "", true
case string:
if x == "‣" {
return "", true
}
return x, true
case []any:
if len(x) == 0 {
return "", true
}
if marker, ok := x[0].(string); ok {
if marker == "‣" && len(x) > 1 {
return desktopRefListText(x[1], refs), true
}
if marker == "," {
return ",", true
}
if marker != "" {
return marker, true
}
}
parts := make([]string, 0, len(x))
handled := false
for _, item := range x {
text, ok := desktopPlain(item, refs)
if !ok {
return "", false
}
handled = true
if text != "" {
parts = append(parts, text)
}
}
return strings.Join(parts, " "), handled
default:
return "", false
}
}
func desktopRefListText(v any, refs referenceLabels) string {
items, ok := v.([]any)
if !ok {
return notiontext.Plain(v)
}
parts := make([]string, 0, len(items))
for _, item := range items {
if text := desktopRefText(item, refs); text != "" {
parts = append(parts, text)
}
}
return strings.Join(parts, " ")
}
func desktopRefText(v any, refs referenceLabels) string {
item, ok := v.([]any)
if !ok || len(item) == 0 {
return notiontext.Plain(v)
}
typ, _ := item[0].(string)
switch typ {
case ",":
return ","
case "u":
if id, ok := stringAt(item, 1); ok {
return labelOrID(refs.Users, id)
}
case "p":
if id, ok := stringAt(item, 1); ok {
return labelOrID(refs.Pages, id)
}
case "d":
if len(item) > 1 {
return dateText(item[1])
}
}
return notiontext.Plain(v)
}
func stringAt(items []any, index int) (string, bool) {
if index >= len(items) {
return "", false
}
s, ok := items[index].(string)
return s, ok
}
func labelOrID(labels map[string]string, id string) string {
if label := labels[id]; label != "" {
return label
}
return id
}
func namedObject(v any) string {
m, ok := v.(map[string]any)
if !ok {
@ -182,6 +358,9 @@ func namedObject(v any) string {
if name, ok := m["name"].(string); ok {
return name
}
if value, ok := m["value"].(string); ok {
return value
}
if id, ok := m["id"].(string); ok {
return id
}
@ -202,7 +381,7 @@ func joinNamed(v any) string {
return strings.Join(parts, ", ")
}
func joinIDs(v any) string {
func joinIDs(v any, refs referenceLabels) string {
items, ok := v.([]any)
if !ok {
return ""
@ -214,7 +393,7 @@ func joinIDs(v any) string {
continue
}
if id, ok := m["id"].(string); ok {
parts = append(parts, id)
parts = append(parts, labelOrID(refs.Pages, id))
}
}
return strings.Join(parts, ", ")
@ -226,14 +405,20 @@ func dateText(v any) string {
return ""
}
start, _ := m["start"].(string)
if start == "" {
start, _ = m["start_date"].(string)
}
end, _ := m["end"].(string)
if end == "" {
end, _ = m["end_date"].(string)
}
if end != "" {
return start + "/" + end
}
return start
}
func formulaText(v any) string {
func formulaText(v any, refs referenceLabels) string {
m, ok := v.(map[string]any)
if !ok {
return ""
@ -252,10 +437,13 @@ func formulaText(v any) string {
case "date":
return dateText(m["date"])
}
if text, ok := desktopValueText(v, refs); ok {
return text
}
return notiontext.Plain(v)
}
func rollupText(v any) string {
func rollupText(v any, refs referenceLabels) string {
m, ok := v.(map[string]any)
if !ok {
return ""
@ -270,12 +458,15 @@ func rollupText(v any) string {
items, _ := m["array"].([]any)
parts := make([]string, 0, len(items))
for _, item := range items {
if text := propertyValueText(item); text != "" {
if text := propertyValueText(item, refs); text != "" {
parts = append(parts, text)
}
}
return strings.Join(parts, ", ")
}
if text, ok := desktopValueText(v, refs); ok {
return text
}
return notiontext.Plain(v)
}

View File

@ -20,13 +20,22 @@ func TestExportDatabaseTSV(t *testing.T) {
now := store.NowMS()
if err := st.UpsertCollection(ctx, store.Collection{
ID: "db1", Name: "Roadmap", Source: "test", SyncedAt: now,
SchemaJSON: `{"Name":{"type":"title"},"Status":{"type":"select"},"Score":{"type":"number"}}`,
SchemaJSON: `{"title":{"name":"Name","type":"title"},"assignee_id":{"name":"Assignee","type":"person"},"due_id":{"name":"Due","type":"date"},"status_id":{"name":"Status","type":"select"},"score_id":{"name":"Score","type":"number"}}`,
}); err != nil {
t.Fatal(err)
}
if err := st.UpsertUser(ctx, store.User{ID: "user1", Name: "Claire Pena", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{
ID: "page1", CollectionID: "db1", Title: "Ship", URL: "https://example.com/ship", Alive: true, Source: "test", SyncedAt: now,
PropertiesJSON: `{"Name":{"type":"title","title":[{"plain_text":"Ship"}]},"Status":{"type":"select","select":{"name":"Done"}},"Score":{"type":"number","number":7}}`,
PropertiesJSON: `{"title":{"type":"title","title":[{"plain_text":"Ship"}]},"status_id":{"type":"select","select":{"name":"Done"}},"score_id":{"type":"number","number":7}}`,
}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{
ID: "page2", ParentID: "db1", ParentTable: "collection", Title: "Draft", URL: "https://example.com/draft", Alive: true, Source: "test", SyncedAt: now,
PropertiesJSON: `{"title":[["Draft"]],"assignee_id":[["‣",[["u","user1"]]]],"due_id":[["‣",[["d",{"type":"date","start_date":"2025-05-23"}]]]],"status_id":[["In progress"]],"score_id":[["3"]]}`,
}); err != nil {
t.Fatal(err)
}
@ -35,11 +44,15 @@ func TestExportDatabaseTSV(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if s.Rows != 1 {
t.Fatalf("expected one row, got %d", s.Rows)
if s.Rows != 2 {
t.Fatalf("expected two rows, got %d", s.Rows)
}
got := out.String()
for _, want := range []string{"page_id\tpage_title\turl\tName\tScore\tStatus", "page1\tShip\thttps://example.com/ship\tShip\t7\tDone"} {
for _, want := range []string{
"page_id\tpage_title\turl\tName\tAssignee\tDue\tScore\tStatus",
"page1\tShip\thttps://example.com/ship\tShip\t\t\t7\tDone",
"page2\tDraft\thttps://example.com/draft\tDraft\tClaire Pena\t2025-05-23\t3\tIn progress",
} {
if !strings.Contains(got, want) {
t.Fatalf("missing %q in:\n%s", want, got)
}