feat: export normalized markdown
This commit is contained in:
parent
e814a23509
commit
6fa3286edc
216
internal/markdown/export.go
Normal file
216
internal/markdown/export.go
Normal file
@ -0,0 +1,216 @@
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/vincentkoc/notioncrawl/internal/notiontext"
|
||||
"github.com/vincentkoc/notioncrawl/internal/store"
|
||||
)
|
||||
|
||||
type Exporter struct {
|
||||
Store *store.Store
|
||||
Dir string
|
||||
}
|
||||
|
||||
type Summary struct {
|
||||
Pages int
|
||||
Files []string
|
||||
}
|
||||
|
||||
func (e Exporter) Export(ctx context.Context) (Summary, error) {
|
||||
if e.Store == nil {
|
||||
return Summary{}, fmt.Errorf("missing store")
|
||||
}
|
||||
if e.Dir == "" {
|
||||
return Summary{}, fmt.Errorf("missing markdown dir")
|
||||
}
|
||||
if err := os.MkdirAll(e.Dir, 0o755); err != nil {
|
||||
return Summary{}, err
|
||||
}
|
||||
pages, err := e.Store.Pages(ctx)
|
||||
if err != nil {
|
||||
return Summary{}, err
|
||||
}
|
||||
var s Summary
|
||||
for _, page := range pages {
|
||||
path, err := e.writePage(ctx, page)
|
||||
if err != nil {
|
||||
return s, err
|
||||
}
|
||||
s.Pages++
|
||||
s.Files = append(s.Files, path)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (e Exporter) writePage(ctx context.Context, page store.Page) (string, error) {
|
||||
spaceName, err := e.Store.SpaceName(ctx, page.SpaceID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
blocks, err := e.Store.PageBlocks(ctx, page.ID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
comments, err := e.Store.PageComments(ctx, page.ID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
spaceSlug := notiontext.Slug(spaceName)
|
||||
titleSlug := notiontext.Slug(page.Title)
|
||||
name := fmt.Sprintf("%s-%s.md", titleSlug, notiontext.ShortID(page.ID))
|
||||
path := filepath.Join(e.Dir, spaceSlug, name)
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
var b strings.Builder
|
||||
writeFrontMatter(&b, page, spaceName)
|
||||
if page.Title != "" {
|
||||
fmt.Fprintf(&b, "# %s\n\n", notiontext.MarkdownEscape(page.Title))
|
||||
}
|
||||
renderBlocks(&b, page.ID, blocks)
|
||||
if len(comments) > 0 {
|
||||
if !strings.HasSuffix(b.String(), "\n\n") {
|
||||
b.WriteString("\n")
|
||||
}
|
||||
b.WriteString("## Comments\n\n")
|
||||
for _, c := range comments {
|
||||
text := notiontext.MarkdownEscape(c.Text)
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(&b, "- %s\n", text)
|
||||
}
|
||||
}
|
||||
out := strings.TrimRight(b.String(), " \n") + "\n"
|
||||
return path, os.WriteFile(path, []byte(out), 0o644)
|
||||
}
|
||||
|
||||
func writeFrontMatter(b *strings.Builder, page store.Page, spaceName string) {
|
||||
b.WriteString("---\n")
|
||||
writeKV(b, "id", page.ID)
|
||||
writeKV(b, "space_id", page.SpaceID)
|
||||
writeKV(b, "space", spaceName)
|
||||
writeKV(b, "title", page.Title)
|
||||
writeKV(b, "source", page.Source)
|
||||
writeKV(b, "notion_url", page.URL)
|
||||
writeKV(b, "created_time", formatMS(page.CreatedTime))
|
||||
writeKV(b, "last_edited_time", formatMS(page.LastEditedTime))
|
||||
b.WriteString("---\n\n")
|
||||
}
|
||||
|
||||
func writeKV(b *strings.Builder, key, value string) {
|
||||
if value == "" {
|
||||
return
|
||||
}
|
||||
value = strings.ReplaceAll(value, "\n", " ")
|
||||
value = strings.ReplaceAll(value, `"`, `\"`)
|
||||
fmt.Fprintf(b, "%s: \"%s\"\n", key, value)
|
||||
}
|
||||
|
||||
func renderBlocks(b *strings.Builder, pageID string, blocks []store.Block) {
|
||||
children := map[string][]store.Block{}
|
||||
for _, block := range blocks {
|
||||
if block.ID == pageID {
|
||||
continue
|
||||
}
|
||||
parent := block.ParentID
|
||||
children[parent] = append(children[parent], block)
|
||||
}
|
||||
for parent := range children {
|
||||
sort.SliceStable(children[parent], func(i, j int) bool {
|
||||
a, z := children[parent][i], children[parent][j]
|
||||
if a.CreatedTime == z.CreatedTime {
|
||||
return a.ID < z.ID
|
||||
}
|
||||
return a.CreatedTime < z.CreatedTime
|
||||
})
|
||||
}
|
||||
renderChildren(b, pageID, children, 0)
|
||||
if len(children[pageID]) == 0 {
|
||||
var loose []store.Block
|
||||
for _, block := range blocks {
|
||||
if block.ID != pageID && block.ParentID != pageID {
|
||||
loose = append(loose, block)
|
||||
}
|
||||
}
|
||||
for _, block := range loose {
|
||||
renderBlock(b, block, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func renderChildren(b *strings.Builder, parentID string, children map[string][]store.Block, depth int) {
|
||||
for _, block := range children[parentID] {
|
||||
renderBlock(b, block, depth)
|
||||
renderChildren(b, block.ID, children, depth+1)
|
||||
}
|
||||
}
|
||||
|
||||
func renderBlock(b *strings.Builder, block store.Block, depth int) {
|
||||
text := notiontext.MarkdownEscape(block.Text)
|
||||
indent := strings.Repeat(" ", depth)
|
||||
switch block.Type {
|
||||
case "header", "heading_1":
|
||||
writeLine(b, "# "+text)
|
||||
case "sub_header", "heading_2":
|
||||
writeLine(b, "## "+text)
|
||||
case "sub_sub_header", "heading_3":
|
||||
writeLine(b, "### "+text)
|
||||
case "bulleted_list", "bulleted_list_item":
|
||||
writeLine(b, indent+"- "+fallback(text, block.Type))
|
||||
case "numbered_list", "numbered_list_item":
|
||||
writeLine(b, indent+"1. "+fallback(text, block.Type))
|
||||
case "to_do", "to_do_item":
|
||||
writeLine(b, indent+"- [ ] "+fallback(text, block.Type))
|
||||
case "quote":
|
||||
writeLine(b, "> "+fallback(text, block.Type))
|
||||
case "code":
|
||||
b.WriteString("```text\n")
|
||||
b.WriteString(text)
|
||||
b.WriteString("\n```\n\n")
|
||||
case "divider":
|
||||
writeLine(b, "---")
|
||||
case "image", "file", "pdf", "video", "figma", "drive":
|
||||
writeLine(b, fmt.Sprintf("[%s: %s]", block.Type, fallback(text, block.ID)))
|
||||
case "column", "column_list", "table", "table_row", "collection_view":
|
||||
if text != "" {
|
||||
writeLine(b, text)
|
||||
}
|
||||
default:
|
||||
if text != "" {
|
||||
writeLine(b, text)
|
||||
} else if block.Type != "" {
|
||||
writeLine(b, fmt.Sprintf("[%s]", block.Type))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func writeLine(b *strings.Builder, line string) {
|
||||
line = strings.TrimRight(line, " ")
|
||||
if line == "" {
|
||||
return
|
||||
}
|
||||
b.WriteString(line)
|
||||
b.WriteString("\n\n")
|
||||
}
|
||||
|
||||
func fallback(s, fallback string) string {
|
||||
if strings.TrimSpace(s) != "" {
|
||||
return s
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
func formatMS(ms int64) string {
|
||||
if ms <= 0 {
|
||||
return ""
|
||||
}
|
||||
return time.UnixMilli(ms).UTC().Format(time.RFC3339)
|
||||
}
|
||||
46
internal/markdown/export_test.go
Normal file
46
internal/markdown/export_test.go
Normal file
@ -0,0 +1,46 @@
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/vincentkoc/notioncrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestExporterWritesMarkdown(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
st, err := store.Open(filepath.Join(t.TempDir(), "notioncrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer st.Close()
|
||||
now := store.NowMS()
|
||||
if err := st.UpsertSpace(ctx, store.Space{ID: "space1", Name: "Engineering", Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := st.UpsertPage(ctx, store.Page{ID: "page1", SpaceID: "space1", Title: "Launch Plan", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := st.UpsertBlock(ctx, store.Block{ID: "block1", PageID: "page1", ParentID: "page1", Type: "bulleted_list", Text: "ship it", Alive: true, Source: "test", SyncedAt: now}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
dir := t.TempDir()
|
||||
s, err := Exporter{Store: st, Dir: dir}.Export(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if s.Pages != 1 || len(s.Files) != 1 {
|
||||
t.Fatalf("unexpected summary: %+v", s)
|
||||
}
|
||||
b, err := os.ReadFile(s.Files[0])
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
text := string(b)
|
||||
if !strings.Contains(text, "# Launch Plan") || !strings.Contains(text, "- ship it") {
|
||||
t.Fatalf("unexpected markdown:\n%s", text)
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user