feat: export normalized markdown

This commit is contained in:
Vincent Koc 2026-04-22 15:54:56 -07:00
parent e814a23509
commit 6fa3286edc
No known key found for this signature in database
2 changed files with 262 additions and 0 deletions

216
internal/markdown/export.go Normal file
View File

@ -0,0 +1,216 @@
package markdown
import (
"context"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/vincentkoc/notioncrawl/internal/notiontext"
"github.com/vincentkoc/notioncrawl/internal/store"
)
type Exporter struct {
Store *store.Store
Dir string
}
type Summary struct {
Pages int
Files []string
}
func (e Exporter) Export(ctx context.Context) (Summary, error) {
if e.Store == nil {
return Summary{}, fmt.Errorf("missing store")
}
if e.Dir == "" {
return Summary{}, fmt.Errorf("missing markdown dir")
}
if err := os.MkdirAll(e.Dir, 0o755); err != nil {
return Summary{}, err
}
pages, err := e.Store.Pages(ctx)
if err != nil {
return Summary{}, err
}
var s Summary
for _, page := range pages {
path, err := e.writePage(ctx, page)
if err != nil {
return s, err
}
s.Pages++
s.Files = append(s.Files, path)
}
return s, nil
}
func (e Exporter) writePage(ctx context.Context, page store.Page) (string, error) {
spaceName, err := e.Store.SpaceName(ctx, page.SpaceID)
if err != nil {
return "", err
}
blocks, err := e.Store.PageBlocks(ctx, page.ID)
if err != nil {
return "", err
}
comments, err := e.Store.PageComments(ctx, page.ID)
if err != nil {
return "", err
}
spaceSlug := notiontext.Slug(spaceName)
titleSlug := notiontext.Slug(page.Title)
name := fmt.Sprintf("%s-%s.md", titleSlug, notiontext.ShortID(page.ID))
path := filepath.Join(e.Dir, spaceSlug, name)
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return "", err
}
var b strings.Builder
writeFrontMatter(&b, page, spaceName)
if page.Title != "" {
fmt.Fprintf(&b, "# %s\n\n", notiontext.MarkdownEscape(page.Title))
}
renderBlocks(&b, page.ID, blocks)
if len(comments) > 0 {
if !strings.HasSuffix(b.String(), "\n\n") {
b.WriteString("\n")
}
b.WriteString("## Comments\n\n")
for _, c := range comments {
text := notiontext.MarkdownEscape(c.Text)
if text == "" {
continue
}
fmt.Fprintf(&b, "- %s\n", text)
}
}
out := strings.TrimRight(b.String(), " \n") + "\n"
return path, os.WriteFile(path, []byte(out), 0o644)
}
func writeFrontMatter(b *strings.Builder, page store.Page, spaceName string) {
b.WriteString("---\n")
writeKV(b, "id", page.ID)
writeKV(b, "space_id", page.SpaceID)
writeKV(b, "space", spaceName)
writeKV(b, "title", page.Title)
writeKV(b, "source", page.Source)
writeKV(b, "notion_url", page.URL)
writeKV(b, "created_time", formatMS(page.CreatedTime))
writeKV(b, "last_edited_time", formatMS(page.LastEditedTime))
b.WriteString("---\n\n")
}
func writeKV(b *strings.Builder, key, value string) {
if value == "" {
return
}
value = strings.ReplaceAll(value, "\n", " ")
value = strings.ReplaceAll(value, `"`, `\"`)
fmt.Fprintf(b, "%s: \"%s\"\n", key, value)
}
func renderBlocks(b *strings.Builder, pageID string, blocks []store.Block) {
children := map[string][]store.Block{}
for _, block := range blocks {
if block.ID == pageID {
continue
}
parent := block.ParentID
children[parent] = append(children[parent], block)
}
for parent := range children {
sort.SliceStable(children[parent], func(i, j int) bool {
a, z := children[parent][i], children[parent][j]
if a.CreatedTime == z.CreatedTime {
return a.ID < z.ID
}
return a.CreatedTime < z.CreatedTime
})
}
renderChildren(b, pageID, children, 0)
if len(children[pageID]) == 0 {
var loose []store.Block
for _, block := range blocks {
if block.ID != pageID && block.ParentID != pageID {
loose = append(loose, block)
}
}
for _, block := range loose {
renderBlock(b, block, 0)
}
}
}
func renderChildren(b *strings.Builder, parentID string, children map[string][]store.Block, depth int) {
for _, block := range children[parentID] {
renderBlock(b, block, depth)
renderChildren(b, block.ID, children, depth+1)
}
}
func renderBlock(b *strings.Builder, block store.Block, depth int) {
text := notiontext.MarkdownEscape(block.Text)
indent := strings.Repeat(" ", depth)
switch block.Type {
case "header", "heading_1":
writeLine(b, "# "+text)
case "sub_header", "heading_2":
writeLine(b, "## "+text)
case "sub_sub_header", "heading_3":
writeLine(b, "### "+text)
case "bulleted_list", "bulleted_list_item":
writeLine(b, indent+"- "+fallback(text, block.Type))
case "numbered_list", "numbered_list_item":
writeLine(b, indent+"1. "+fallback(text, block.Type))
case "to_do", "to_do_item":
writeLine(b, indent+"- [ ] "+fallback(text, block.Type))
case "quote":
writeLine(b, "> "+fallback(text, block.Type))
case "code":
b.WriteString("```text\n")
b.WriteString(text)
b.WriteString("\n```\n\n")
case "divider":
writeLine(b, "---")
case "image", "file", "pdf", "video", "figma", "drive":
writeLine(b, fmt.Sprintf("[%s: %s]", block.Type, fallback(text, block.ID)))
case "column", "column_list", "table", "table_row", "collection_view":
if text != "" {
writeLine(b, text)
}
default:
if text != "" {
writeLine(b, text)
} else if block.Type != "" {
writeLine(b, fmt.Sprintf("[%s]", block.Type))
}
}
}
func writeLine(b *strings.Builder, line string) {
line = strings.TrimRight(line, " ")
if line == "" {
return
}
b.WriteString(line)
b.WriteString("\n\n")
}
func fallback(s, fallback string) string {
if strings.TrimSpace(s) != "" {
return s
}
return fallback
}
func formatMS(ms int64) string {
if ms <= 0 {
return ""
}
return time.UnixMilli(ms).UTC().Format(time.RFC3339)
}

View File

@ -0,0 +1,46 @@
package markdown
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"github.com/vincentkoc/notioncrawl/internal/store"
)
func TestExporterWritesMarkdown(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notioncrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
now := store.NowMS()
if err := st.UpsertSpace(ctx, store.Space{ID: "space1", Name: "Engineering", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{ID: "page1", SpaceID: "space1", Title: "Launch Plan", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertBlock(ctx, store.Block{ID: "block1", PageID: "page1", ParentID: "page1", Type: "bulleted_list", Text: "ship it", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
dir := t.TempDir()
s, err := Exporter{Store: st, Dir: dir}.Export(ctx)
if err != nil {
t.Fatal(err)
}
if s.Pages != 1 || len(s.Files) != 1 {
t.Fatalf("unexpected summary: %+v", s)
}
b, err := os.ReadFile(s.Files[0])
if err != nil {
t.Fatal(err)
}
text := string(b)
if !strings.Contains(text, "# Launch Plan") || !strings.Contains(text, "- ship it") {
t.Fatalf("unexpected markdown:\n%s", text)
}
}