357 lines
8.7 KiB
Go
357 lines
8.7 KiB
Go
package markdown
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/vincentkoc/notcrawl/internal/notiontext"
|
|
"github.com/vincentkoc/notcrawl/internal/store"
|
|
)
|
|
|
|
type Exporter struct {
|
|
Store *store.Store
|
|
Dir string
|
|
}
|
|
|
|
type Summary struct {
|
|
Pages int
|
|
Files []string
|
|
}
|
|
|
|
func (e Exporter) Export(ctx context.Context) (Summary, error) {
|
|
if e.Store == nil {
|
|
return Summary{}, fmt.Errorf("missing store")
|
|
}
|
|
if e.Dir == "" {
|
|
return Summary{}, fmt.Errorf("missing markdown dir")
|
|
}
|
|
if err := os.MkdirAll(e.Dir, 0o755); err != nil {
|
|
return Summary{}, err
|
|
}
|
|
pages, err := e.Store.Pages(ctx)
|
|
if err != nil {
|
|
return Summary{}, err
|
|
}
|
|
paths, err := newPathResolver(ctx, e.Store)
|
|
if err != nil {
|
|
return Summary{}, err
|
|
}
|
|
var s Summary
|
|
keep := map[string]bool{}
|
|
for _, page := range pages {
|
|
path, err := e.writePage(ctx, paths, page)
|
|
if err != nil {
|
|
return s, err
|
|
}
|
|
keep[filepath.Clean(path)] = true
|
|
s.Pages++
|
|
s.Files = append(s.Files, path)
|
|
}
|
|
if err := pruneStaleMarkdown(e.Dir, keep); err != nil {
|
|
return s, err
|
|
}
|
|
return s, nil
|
|
}
|
|
|
|
func (e Exporter) writePage(ctx context.Context, paths pathResolver, page store.Page) (string, error) {
|
|
spaceName := paths.spaceName(page.SpaceID)
|
|
teamID := paths.pageTeamID(page)
|
|
teamName := paths.teamName(teamID)
|
|
blocks, err := e.Store.PageBlocks(ctx, page.ID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
comments, err := e.Store.PageComments(ctx, page.ID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
spaceSlug := notiontext.Slug(spaceName)
|
|
titleSlug := maxSlug(notiontext.Slug(page.Title), 96)
|
|
name := fmt.Sprintf("%s-%s.md", titleSlug, notiontext.ShortID(page.ID))
|
|
parts := []string{e.Dir, spaceSlug}
|
|
if teamName != "" {
|
|
parts = append(parts, notiontext.Slug(teamName))
|
|
}
|
|
parts = append(parts, name)
|
|
path := filepath.Join(parts...)
|
|
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
|
return "", err
|
|
}
|
|
var b strings.Builder
|
|
writeFrontMatter(&b, page, spaceName, teamID, teamName)
|
|
if page.Title != "" {
|
|
fmt.Fprintf(&b, "# %s\n\n", notiontext.MarkdownEscape(page.Title))
|
|
}
|
|
renderBlocks(&b, page.ID, blocks)
|
|
if len(comments) > 0 {
|
|
if !strings.HasSuffix(b.String(), "\n\n") {
|
|
b.WriteString("\n")
|
|
}
|
|
b.WriteString("## Comments\n\n")
|
|
for _, c := range comments {
|
|
text := notiontext.MarkdownEscape(c.Text)
|
|
if text == "" {
|
|
continue
|
|
}
|
|
fmt.Fprintf(&b, "- %s\n", text)
|
|
}
|
|
}
|
|
out := strings.TrimRight(b.String(), " \n") + "\n"
|
|
return path, os.WriteFile(path, []byte(out), 0o644)
|
|
}
|
|
|
|
type pathResolver struct {
|
|
spaces map[string]string
|
|
teams map[string]string
|
|
blocks map[string]store.ParentRef
|
|
collections map[string]store.ParentRef
|
|
}
|
|
|
|
func newPathResolver(ctx context.Context, st *store.Store) (pathResolver, error) {
|
|
spaces, err := st.SpaceNames(ctx)
|
|
if err != nil {
|
|
return pathResolver{}, err
|
|
}
|
|
teams, err := st.TeamNames(ctx)
|
|
if err != nil {
|
|
return pathResolver{}, err
|
|
}
|
|
blocks, err := st.BlockParents(ctx)
|
|
if err != nil {
|
|
return pathResolver{}, err
|
|
}
|
|
collections, err := st.CollectionParents(ctx)
|
|
if err != nil {
|
|
return pathResolver{}, err
|
|
}
|
|
return pathResolver{spaces: spaces, teams: teams, blocks: blocks, collections: collections}, nil
|
|
}
|
|
|
|
func (r pathResolver) spaceName(id string) string {
|
|
if id == "" {
|
|
return "default"
|
|
}
|
|
if name := r.spaces[id]; name != "" {
|
|
return name
|
|
}
|
|
return "space-" + notiontext.ShortID(id)
|
|
}
|
|
|
|
func (r pathResolver) teamName(id string) string {
|
|
if id == "" {
|
|
return ""
|
|
}
|
|
if name := r.teams[id]; name != "" {
|
|
return name
|
|
}
|
|
return "team-" + notiontext.ShortID(id)
|
|
}
|
|
|
|
func (r pathResolver) pageTeamID(page store.Page) string {
|
|
return r.resolveTeamID(page.ParentTable, page.ParentID, page.CollectionID, map[string]bool{page.ID: true})
|
|
}
|
|
|
|
func (r pathResolver) resolveTeamID(table, id, collectionID string, seen map[string]bool) string {
|
|
if table == "team" {
|
|
return id
|
|
}
|
|
if table == "collection" && id == "" {
|
|
id = collectionID
|
|
}
|
|
if id == "" || seen[table+":"+id] {
|
|
return ""
|
|
}
|
|
seen[table+":"+id] = true
|
|
switch table {
|
|
case "block":
|
|
parent := r.blocks[id]
|
|
return r.resolveTeamID(parent.Table, parent.ID, "", seen)
|
|
case "collection", "database", "data_source":
|
|
parent := r.collections[id]
|
|
return r.resolveTeamID(parent.Table, parent.ID, "", seen)
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
func writeFrontMatter(b *strings.Builder, page store.Page, spaceName, teamID, teamName string) {
|
|
b.WriteString("---\n")
|
|
writeKV(b, "id", page.ID)
|
|
writeKV(b, "space_id", page.SpaceID)
|
|
writeKV(b, "space", spaceName)
|
|
writeKV(b, "team_id", teamID)
|
|
writeKV(b, "team", teamName)
|
|
writeKV(b, "title", page.Title)
|
|
writeKV(b, "source", page.Source)
|
|
writeKV(b, "notion_url", page.URL)
|
|
writeKV(b, "created_time", formatMS(page.CreatedTime))
|
|
writeKV(b, "last_edited_time", formatMS(page.LastEditedTime))
|
|
b.WriteString("---\n\n")
|
|
}
|
|
|
|
func writeKV(b *strings.Builder, key, value string) {
|
|
if value == "" {
|
|
return
|
|
}
|
|
value = strings.ReplaceAll(value, "\n", " ")
|
|
value = strings.ReplaceAll(value, `"`, `\"`)
|
|
fmt.Fprintf(b, "%s: \"%s\"\n", key, value)
|
|
}
|
|
|
|
func renderBlocks(b *strings.Builder, pageID string, blocks []store.Block) {
|
|
children := map[string][]store.Block{}
|
|
for _, block := range blocks {
|
|
if block.ID == pageID {
|
|
continue
|
|
}
|
|
parent := block.ParentID
|
|
children[parent] = append(children[parent], block)
|
|
}
|
|
for parent := range children {
|
|
sort.SliceStable(children[parent], func(i, j int) bool {
|
|
a, z := children[parent][i], children[parent][j]
|
|
if a.DisplayOrder != z.DisplayOrder {
|
|
return a.DisplayOrder < z.DisplayOrder
|
|
}
|
|
if a.CreatedTime == z.CreatedTime {
|
|
return a.ID < z.ID
|
|
}
|
|
return a.CreatedTime < z.CreatedTime
|
|
})
|
|
}
|
|
renderChildren(b, pageID, children, 0)
|
|
if len(children[pageID]) == 0 {
|
|
var loose []store.Block
|
|
for _, block := range blocks {
|
|
if block.ID != pageID && block.ParentID != pageID {
|
|
loose = append(loose, block)
|
|
}
|
|
}
|
|
for _, block := range loose {
|
|
renderBlock(b, block, 0)
|
|
}
|
|
}
|
|
}
|
|
|
|
func renderChildren(b *strings.Builder, parentID string, children map[string][]store.Block, depth int) {
|
|
for _, block := range children[parentID] {
|
|
renderBlock(b, block, depth)
|
|
renderChildren(b, block.ID, children, depth+1)
|
|
}
|
|
}
|
|
|
|
func renderBlock(b *strings.Builder, block store.Block, depth int) {
|
|
text := notiontext.MarkdownEscape(block.Text)
|
|
indent := strings.Repeat(" ", depth)
|
|
switch block.Type {
|
|
case "header", "heading_1":
|
|
writeLine(b, "# "+text)
|
|
case "sub_header", "heading_2":
|
|
writeLine(b, "## "+text)
|
|
case "sub_sub_header", "heading_3":
|
|
writeLine(b, "### "+text)
|
|
case "bulleted_list", "bulleted_list_item":
|
|
writeLine(b, indent+"- "+fallback(text, block.Type))
|
|
case "numbered_list", "numbered_list_item":
|
|
writeLine(b, indent+"1. "+fallback(text, block.Type))
|
|
case "to_do", "to_do_item":
|
|
writeLine(b, indent+"- [ ] "+fallback(text, block.Type))
|
|
case "quote":
|
|
writeLine(b, "> "+fallback(text, block.Type))
|
|
case "code":
|
|
b.WriteString("```text\n")
|
|
b.WriteString(text)
|
|
b.WriteString("\n```\n\n")
|
|
case "divider":
|
|
writeLine(b, "---")
|
|
case "image", "file", "pdf", "video", "figma", "drive":
|
|
writeLine(b, fmt.Sprintf("[%s: %s]", block.Type, fallback(text, block.ID)))
|
|
case "column", "column_list", "table", "table_row", "collection_view":
|
|
if text != "" {
|
|
writeLine(b, text)
|
|
}
|
|
default:
|
|
if text != "" {
|
|
writeLine(b, text)
|
|
} else if block.Type != "" {
|
|
writeLine(b, fmt.Sprintf("[%s]", block.Type))
|
|
}
|
|
}
|
|
}
|
|
|
|
func writeLine(b *strings.Builder, line string) {
|
|
line = strings.TrimRight(line, " ")
|
|
if line == "" {
|
|
return
|
|
}
|
|
b.WriteString(line)
|
|
b.WriteString("\n\n")
|
|
}
|
|
|
|
func fallback(s, fallback string) string {
|
|
if strings.TrimSpace(s) != "" {
|
|
return s
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func pruneStaleMarkdown(root string, keep map[string]bool) error {
|
|
var dirs []string
|
|
if err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
path = filepath.Clean(path)
|
|
if d.IsDir() {
|
|
if path != filepath.Clean(root) {
|
|
dirs = append(dirs, path)
|
|
}
|
|
return nil
|
|
}
|
|
if filepath.Ext(path) == ".md" && !keep[path] {
|
|
return os.Remove(path)
|
|
}
|
|
return nil
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
sort.Slice(dirs, func(i, j int) bool {
|
|
return len(dirs[i]) > len(dirs[j])
|
|
})
|
|
for _, dir := range dirs {
|
|
if err := os.Remove(dir); err != nil && !isIgnorableRemoveDirError(err) {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func isIgnorableRemoveDirError(err error) bool {
|
|
return errors.Is(err, os.ErrNotExist) || errors.Is(err, syscall.ENOTEMPTY) || errors.Is(err, syscall.EEXIST)
|
|
}
|
|
|
|
func formatMS(ms int64) string {
|
|
if ms <= 0 {
|
|
return ""
|
|
}
|
|
return time.UnixMilli(ms).UTC().Format(time.RFC3339)
|
|
}
|
|
|
|
func maxSlug(s string, max int) string {
|
|
if len(s) <= max {
|
|
return s
|
|
}
|
|
s = strings.TrimRight(s[:max], "-")
|
|
if s == "" {
|
|
return "untitled"
|
|
}
|
|
return s
|
|
}
|