Compare commits

...

72 Commits

Author SHA1 Message Date
Vincent Koc
e061bf481e
docs: document ci coverage
Some checks failed
CI / release-check (push) Has been cancelled
CodeQL / analyze (push) Has been cancelled
CI / deps (push) Has been cancelled
CI / lint (push) Has been cancelled
CI / test (push) Has been cancelled
2026-05-05 19:29:38 -07:00
Vincent Koc
f98087df38
ci: add codeql and control gates 2026-05-05 19:29:37 -07:00
Vincent Koc
a41c39aa5c
docs: document crawlkit integration 2026-05-05 19:16:52 -07:00
Vincent Koc
1102a0e3d7
chore(deps): use crawlkit v0.4.0
Some checks failed
Validation / validate (push) Has been cancelled
2026-05-05 02:12:57 -07:00
Vincent Koc
9c74c041f8
fix(cli): keep notcrawl help side-effect free 2026-05-04 04:06:13 -07:00
Vincent Koc
e3265a2003
fix(tui): keep notion databases visible 2026-05-04 01:35:52 -07:00
Vincent Koc
1d531c7a3b
docs: note notion parent resolution 2026-05-04 00:44:01 -07:00
Vincent Koc
e8cd9d36e7
fix(tui): resolve notion block parents 2026-05-04 00:43:34 -07:00
Vincent Koc
449015e3db
fix(tui): collapse noisy notion link runs 2026-05-03 23:29:15 -07:00
Vincent Koc
fcaa59ad8c
fix(tui): compact repeated notion links 2026-05-03 23:24:04 -07:00
Vincent Koc
54a8c48b37
fix(tui): preserve readable notion mention labels 2026-05-03 23:22:37 -07:00
Vincent Koc
512e448b93
fix(tui): scrub notion mention artifacts 2026-05-03 23:19:58 -07:00
Vincent Koc
626f1ccdb4
fix(tui): clean notion preview markers 2026-05-03 23:02:08 -07:00
Vincent Koc
8d20f10b6b
fix(tui): order notion previews by display tree 2026-05-03 22:47:29 -07:00
Vincent Koc
3ab777fa25
docs(tui): note notion comments in previews 2026-05-03 15:49:59 -07:00
Vincent Koc
5e50da9670
fix(tui): include notion comments in previews 2026-05-03 15:49:36 -07:00
Vincent Koc
6df5495445
docs(tui): note richer notion previews 2026-05-03 15:45:36 -07:00
Vincent Koc
6c77599d6b
fix(tui): enrich notion page previews 2026-05-03 15:43:57 -07:00
Vincent Koc
7c820f2404
docs(tui): note workspace parent cleanup 2026-05-03 10:34:01 -07:00
Vincent Koc
7a0804de0d
fix(tui): normalize notion workspace parents 2026-05-03 10:29:20 -07:00
Vincent Koc
39711670eb
feat(tui): refresh notion archive rows 2026-05-03 09:54:43 -07:00
Vincent Koc
f987bfbcc4
docs: note shared tui polish 2026-05-03 09:13:12 -07:00
Vincent Koc
cfe461ab28
docs: note tui parent cleanup 2026-05-03 04:09:27 -07:00
Vincent Koc
d72298d10d
fix(tui): hide noisy notion parents 2026-05-03 04:06:03 -07:00
Vincent Koc
b0eb9c6500
fix(tui): hide raw notion parent ids 2026-05-03 03:23:24 -07:00
Vincent Koc
a36c49fe2f
fix(tui): document shared controls 2026-05-03 03:10:17 -07:00
Vincent Koc
d7460b028b
fix(tui): expose notion preview details 2026-05-03 02:57:43 -07:00
Vincent Koc
ae2bb3c032
docs(tui): note notion parent labels 2026-05-03 00:32:29 -07:00
Vincent Koc
4536693bc2
fix(tui): resolve notion database parents 2026-05-03 00:31:04 -07:00
Vincent Koc
3c5ea37699
fix(tui): use compact-pane crawlkit 2026-05-03 00:14:32 -07:00
Vincent Koc
57955f7ce5
fix(tui): show notion page previews 2026-05-03 00:07:05 -07:00
Vincent Koc
5ed2877778
fix(sync): log progress percentages 2026-05-02 19:34:14 -07:00
Vincent Koc
f7d3116ef5
fix(share): preserve notcrawl update semantics 2026-05-02 17:03:31 -07:00
Vincent Koc
209856001e
fix(tui): resolve notion archive labels 2026-05-02 15:13:30 -07:00
Vincent Koc
3ed073a6f1
chore(deps): bump crawlkit to v0.3.12 2026-05-02 14:55:14 -07:00
Vincent Koc
29f30b9520
fix(release): stamp notcrawl version 2026-05-02 14:54:57 -07:00
Vincent Koc
0a5e72c291
fix(share): refresh publish origin 2026-05-02 14:53:59 -07:00
Vincent Koc
021747643c
chore(deps): update crawlkit to v0.3.11 2026-05-02 11:42:43 -07:00
Vincent Koc
f8fec210b5
fix(cli): make tui help config-free 2026-05-02 10:42:47 -07:00
Vincent Koc
338bc82bc9
chore(deps): update crawlkit to v0.3.10 2026-05-02 10:21:39 -07:00
Vincent Koc
0b3438d707
chore(deps): update crawlkit to v0.3.9 2026-05-02 10:04:02 -07:00
Vincent Koc
1841099e6a
chore(deps): update crawlkit to v0.3.8 2026-05-02 09:44:39 -07:00
Vincent Koc
44295f7d99
docs(changelog): note TUI pane polish 2026-05-02 09:36:25 -07:00
Vincent Koc
3c61600aaa
chore(deps): update crawlkit to v0.3.7 2026-05-02 09:25:05 -07:00
Vincent Koc
03f8e18710
feat(tui): use shared pane browser 2026-05-02 09:04:42 -07:00
Vincent Koc
e66c54dd47
chore(deps): update crawlkit to v0.3.5 2026-05-02 07:52:49 -07:00
Vincent Koc
f3b59ff4fe
fix(tui): use crawlkit empty-json fix 2026-05-02 07:46:16 -07:00
Vincent Koc
35ad15a2e6
fix(tui): use crawlkit safe renderer 2026-05-02 07:40:12 -07:00
Vincent Koc
261b82e104
ci: smoke crawlkit control surface 2026-05-01 16:10:13 -07:00
Vincent Koc
2f8be8ed0a
feat(cli): add crawlkit control surface 2026-05-01 16:09:37 -07:00
Vincent Koc
14a7743115
feat(tui): use universal archive rows 2026-05-01 13:25:04 -07:00
Vincent Koc
66f3704f71
chore: tidy crawlkit module sums 2026-05-01 12:42:57 -07:00
Vincent Koc
d10b860f25
refactor: use crawlkit package nouns 2026-05-01 12:39:11 -07:00
Vincent Koc
eca7465212
chore: use crawlkit v0.2.0 2026-05-01 10:43:16 -07:00
Vincent Koc
ece68216e8
docs(tui): document notcrawl terminal browser 2026-05-01 10:36:50 -07:00
Vincent Koc
a0298d839e
test(tui): cover notcrawl browser rows 2026-05-01 10:23:07 -07:00
Vincent Koc
c16a627be7
feat(tui): add archive browser command 2026-05-01 10:22:37 -07:00
Vincent Koc
2e7a62abfe
chore: use crawlkit v0.1.1 2026-05-01 09:50:50 -07:00
Vincent Koc
a27a70f824
chore: use crawlkit v0.1.0 2026-05-01 09:35:00 -07:00
Vincent Koc
3459ddf5a1
refactor(share): use crawlkit gitshare helpers 2026-05-01 09:14:17 -07:00
Vincent Koc
6ea5ca8f6f
refactor(store): use crawlkit sqlite opener 2026-05-01 08:57:18 -07:00
Vincent Koc
2f778d8528
refactor(config): use crawlkit TOML helpers 2026-05-01 08:52:02 -07:00
Vincent Koc
b113a16d5b
chore: add crawlkit module dependency 2026-05-01 08:47:58 -07:00
Vincent Koc
e280c7f350
fix(release): generate auditable Homebrew formula
Some checks failed
Release Drafter / autolabel (push) Has been cancelled
Release Drafter / Update Release Draft (push) Has been cancelled
Validation / validate (push) Has been cancelled
2026-04-29 13:06:57 -07:00
Vincent Koc
b687296e43
fix(release): validate Homebrew tap push token 2026-04-29 13:02:21 -07:00
Vincent Koc
a88136abad
fix(release): publish formula to Homebrew tap (#26)
Fixes #25
2026-04-29 12:27:26 -07:00
Vincent Koc
fc30106579
fix(export): clean emoji from filenames (#24) 2026-04-29 05:13:19 -07:00
Vincent Koc
bd4b1977b6
fix(export): render desktop database CSVs correctly (#23) 2026-04-29 05:03:43 -07:00
Vincent Koc
ea691828c6
fix(export): add bulk database export (#22) 2026-04-29 04:32:52 -07:00
Vincent Koc
2dcf13a432
fix(api): retry transient Notion failures (#21) 2026-04-29 04:17:02 -07:00
Vincent Koc
091db7b53d
fix(api): dedupe Notion rich text titles (#20)
Fixes #19
2026-04-29 04:08:34 -07:00
Vincent Koc
d233112b04
ci(release): update goreleaser action
Update GoReleaser action to v7 for the release workflow.
2026-04-27 14:15:30 -07:00
27 changed files with 2542 additions and 319 deletions

128
.github/workflows/ci.yml vendored Normal file
View File

@ -0,0 +1,128 @@
name: CI
on:
pull_request:
push:
branches:
- main
- feat/use-crawlkit
workflow_dispatch:
permissions:
contents: read
concurrency:
group: ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
jobs:
deps:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version-file: go.mod
cache: true
- name: Verify module cache
run: go mod verify
- name: Check go.mod tidy
run: |
go mod tidy
git diff --exit-code -- go.mod go.sum
- name: Install govulncheck
run: go install golang.org/x/vuln/cmd/govulncheck@v1.3.0
- name: Run govulncheck
run: '"$(go env GOPATH)/bin/govulncheck" ./...'
lint:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version-file: go.mod
cache: true
- name: Check formatting
run: |
changed="$(gofmt -l .)"
if [ -n "$changed" ]; then
printf 'gofmt wants changes in:\n%s\n' "$changed"
exit 1
fi
- name: Vet
run: go vet ./...
test:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version-file: go.mod
cache: true
- name: Test
run: go test -count=1 ./...
- name: Build CLI
run: go build -ldflags "-X main.version=ci" -o bin/notcrawl ./cmd/notcrawl
- name: Smoke test CLI control surface
run: |
set -euo pipefail
output="$(./bin/notcrawl --help 2>&1)"
printf '%s\n' "$output"
printf '%s' "$output" | grep -q "Usage of notcrawl:"
printf '%s' "$output" | grep -q "metadata"
printf '%s' "$output" | grep -q "tui"
test "$(./bin/notcrawl --version)" = "ci"
./bin/notcrawl metadata --json | grep -q '"schema_version"'
cfg="$RUNNER_TEMP/notcrawl.toml"
db="$RUNNER_TEMP/notcrawl.db"
./bin/notcrawl --config "$cfg" init
./bin/notcrawl --config "$cfg" --db "$db" status --json | grep -q '"databases"'
./bin/notcrawl --config "$cfg" --db "$db" tui --json --limit 1 | grep -q '^\['
release-check:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version-file: go.mod
cache: true
- name: Snapshot release build
uses: goreleaser/goreleaser-action@v7.2.1
with:
distribution: goreleaser
version: "~> v2"
args: release --snapshot --clean --skip=publish

41
.github/workflows/codeql.yml vendored Normal file
View File

@ -0,0 +1,41 @@
name: CodeQL
on:
pull_request:
push:
branches:
- main
- feat/use-crawlkit
schedule:
- cron: "37 4 * * 1"
workflow_dispatch:
permissions:
actions: read
contents: read
security-events: write
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
jobs:
analyze:
name: analyze
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version-file: go.mod
cache: true
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
languages: go
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4

View File

@ -27,19 +27,27 @@ jobs:
update-tap:
runs-on: ubuntu-latest
if: startsWith(github.event.release.tag_name || inputs.tag_name, 'v')
env:
TAP_REPO: ${{ vars.HOMEBREW_TAP_REPO || 'vincentkoc/homebrew-tap' }}
steps:
- name: Validate tap configuration
env:
GH_TOKEN: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }}
run: |
set -euo pipefail
if [ -z "${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }}" ]; then
if [ -z "${GH_TOKEN}" ]; then
echo "Secret HOMEBREW_TAP_GITHUB_TOKEN is required."
exit 1
fi
if [ "$(gh api "repos/${TAP_REPO}" --jq '.permissions.push // false')" != "true" ]; then
echo "HOMEBREW_TAP_GITHUB_TOKEN must have push access to ${TAP_REPO}."
exit 1
fi
- name: Checkout tap repository
uses: actions/checkout@v5
with:
repository: ${{ vars.HOMEBREW_TAP_REPO || 'vincentkoc/tap' }}
repository: ${{ env.TAP_REPO }}
token: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }}
- name: Update formula
@ -48,7 +56,6 @@ jobs:
SOURCE_REPO: ${{ github.repository }}
run: |
set -euo pipefail
VERSION="${TAG#v}"
SOURCE_URL="https://github.com/${SOURCE_REPO}/archive/refs/tags/${TAG}.tar.gz"
curl -fsSL "${SOURCE_URL}" -o /tmp/notcrawl-src.tar.gz
@ -62,12 +69,11 @@ jobs:
url "${SOURCE_URL}"
sha256 "${SHA256}"
license "MIT"
version "${VERSION}"
depends_on "go" => :build
def install
system "go", "build", *std_go_args(ldflags: "-s -w"), "./cmd/notcrawl"
system "go", "build", *std_go_args(ldflags: "-s -w -X main.version=#{version}"), "./cmd/notcrawl"
pkgshare.install "config.example.toml"
doc.install "README.md", "LICENSE", "SPEC.md"
end

View File

@ -50,7 +50,7 @@ jobs:
echo "RELEASE_VERSION=${TAG#v}" >> "$GITHUB_ENV"
- name: Build release artifacts
uses: goreleaser/goreleaser-action@v6
uses: goreleaser/goreleaser-action@v7
with:
distribution: goreleaser
version: "~> v2"

View File

@ -1,41 +0,0 @@
name: Validation
on:
push:
branches:
- "**"
pull_request:
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
jobs:
validate:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version-file: go.mod
cache: true
- name: Format check
run: |
set -euo pipefail
test -z "$(gofmt -l .)"
- name: Run tests
run: go test ./...
- name: Build CLI
run: go build ./cmd/notcrawl
- name: Smoke test CLI help
run: |
set -euo pipefail
output="$(go run ./cmd/notcrawl --help 2>&1)"
printf '%s\n' "$output"
printf '%s' "$output" | grep -q "Usage of notcrawl:"

View File

@ -17,7 +17,7 @@ builds:
flags:
- -trimpath
ldflags:
- -s -w
- -s -w -X main.version={{ .Version }}
archives:
- id: bundles

29
CHANGELOG.md Normal file
View File

@ -0,0 +1,29 @@
# Changelog
## Unreleased
- Replace the single validation workflow with CI jobs for dependencies,
formatting/vet, tests, CLI control-surface smoke checks, and GoReleaser
snapshot builds.
- Add CodeQL analysis on pull requests, `main`, the crawlkit integration branch,
weekly schedule, and manual dispatch.
- Depend on `github.com/vincentkoc/crawlkit v0.4.0` for shared config,
status/control, snapshot, mirror, output, and terminal explorer mechanics.
- Keep Notion API/Desktop parsing, Markdown rendering, page/comment/database
schemas, Notion FTS body construction, and data-source compatibility
app-owned while the shared mechanics move to crawlkit.
- Document the gitcrawl-style document TUI shape: workspace/teamspace/page or
database groups, page/database rows, preview/comment detail, sorting, mouse
selection, right-click actions, and local/remote status chrome.
- Add crawlkit control metadata/status surfaces with `metadata --json`, `status --json`, and `doctor --json`.
- Report primary archive and desktop-cache SQLite inventories in status JSON for shared local control surfaces.
- Add `notcrawl tui`, a local terminal browser for archived pages and databases backed by `crawlkit/tui`.
- Render TUI rows with compact panes so page and database metadata stays in context/detail instead of crowding the row list.
- Resolve database parent names for the TUI parent pane so collection nesting is readable instead of raw IDs.
- Hide noisy block-derived Notion parent labels in the TUI by falling back to the workspace label when parent text contains raw Notion identifiers.
- Resolve block-parent pages to their owning page when possible so the TUI parent pane shows real Notion hierarchy instead of broad workspace buckets.
- Normalize workspace-level Notion parents as `Workspace: <name>` so the TUI left pane does not split the same workspace into duplicate parent groups.
- Inherit shared crawlkit TUI improvements for newest-first startup, count-header sorting, preview-first document detail panes, and gitcrawl-style metadata labels.
- Feed longer, block-shaped Notion page previews into the TUI detail pane so pages read more like documents instead of flat metadata.
- Include page comments in Notion TUI previews after block content.
- Route the TUI through read-only SQLite access and cover the JSON fallback in tests.

View File

@ -25,6 +25,7 @@ to without holding Notion credentials.
- normalized Markdown export organized by Unicode-safe workspace, teamspace, and page paths
- CSV/TSV export for crawled Notion database rows
- compressed JSONL git-share snapshots plus import/update workflows
- terminal archive browser for quick local page/database inspection
- archive status, activity reporting, and SQLite maintenance commands
- read-only SQL access for ad hoc inspection
@ -50,6 +51,7 @@ notcrawl report
notcrawl sync --source desktop
notcrawl export-md
notcrawl search "launch plan"
notcrawl tui
```
Or use the official Notion API:
@ -59,6 +61,7 @@ export NOTION_TOKEN="secret_..."
notcrawl sync --source api
notcrawl databases
notcrawl export-db --database DATABASE_ID --format csv --output roadmap.csv
notcrawl export-db --all --dir exports/csv
```
Default paths:
@ -74,18 +77,35 @@ Default paths:
- `init` writes a starter config
- `doctor` checks config, SQLite, desktop cache, and token presence
- `status` prints archive counts, last sync time, and database/WAL size
- `metadata --json`, `status --json`, and `doctor --json` expose crawlkit
control/status payloads for launchers, automation, and CI
- `report` summarizes recent page, database, space, and comment activity
- `maintain` rebuilds FTS, optimizes SQLite indexes, and can run `VACUUM`
- `sync` ingests from `desktop`, `api`, or `all`
- `export-md` renders normalized Markdown files from SQLite
- `databases` lists crawled Notion databases
- `export-db` exports a crawled Notion database to CSV or TSV
- `export-db` exports one crawled Notion database, or all databases with `--all --dir`, to CSV or TSV
- `search` searches page and comment text through FTS5
- `tui` opens the terminal archive browser for pages and databases
- `sql` runs read-only SQL against the archive
- `publish` exports SQLite tables and Markdown into a git share repo
- `subscribe` clones a share repo and imports the latest snapshot
- `update` pulls and imports a subscribed share repo
## Shared crawlkit surfaces
`notcrawl` uses `crawlkit` for standard config paths, SQLite open/read helpers,
snapshot packing/import, git-backed archive sharing, output formatting, status
payloads, and the shared terminal explorer. Notion API/Desktop parsing,
Markdown rendering, page/comment/database schemas, and Notion FTS bodies remain
owned by `notcrawl`.
The TUI follows the gitcrawl-style three-pane model: workspace/teamspace/page or
database groups on the left, pages/databases in the middle, and a readable
document preview plus comments and metadata on the right. It supports pane
focus, sortable headers, mouse selection, right-click actions, and a
local/remote footer.
## Distribution
Release packaging is managed with GoReleaser. Tagged releases build tarballs,

View File

@ -163,6 +163,7 @@ those pages through `pages.collection_id`.
```text
notcrawl export-db --database <database-id> --format csv --output rows.csv
notcrawl export-db --database <database-id> --format tsv --output rows.tsv
notcrawl export-db --all --dir exports/csv
```
The first columns are stable metadata:

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,18 @@
package main
import "testing"
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"github.com/vincentkoc/notcrawl/internal/store"
)
func TestSearchFieldCollapsesRecordSeparators(t *testing.T) {
got := searchField("line one\nline\ttwo line three")
@ -8,3 +20,431 @@ func TestSearchFieldCollapsesRecordSeparators(t *testing.T) {
t.Fatalf("unexpected field: %q", got)
}
}
func TestTUIJSONListsArchiveRowsWithoutMutation(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
dbPath := filepath.Join(dir, "notcrawl.db")
st, err := store.Open(dbPath)
if err != nil {
t.Fatal(err)
}
now := store.NowMS()
if err := st.UpsertCollection(ctx, store.Collection{ID: "db1", Name: "Roadmap", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{
ID: "page1",
CollectionID: "db1",
Title: "Launch Plan",
URL: "https://example.com/launch",
Alive: true,
Source: "test",
SyncedAt: now,
LastEditedTime: now,
}); err != nil {
t.Fatal(err)
}
if err := st.UpsertBlock(ctx, store.Block{
ID: "block1",
PageID: "page1",
ParentID: "page1",
Type: "bulleted_list",
Text: "sync launch checklist",
DisplayOrder: 1,
Alive: true,
Source: "test",
SyncedAt: now,
}); err != nil {
t.Fatal(err)
}
if err := st.Close(); err != nil {
t.Fatal(err)
}
before, err := os.ReadFile(dbPath)
if err != nil {
t.Fatal(err)
}
var stdout, stderr bytes.Buffer
err = run(ctx, []string{"--config", filepath.Join(dir, "missing.toml"), "--db", dbPath, "tui", "--json"}, &stdout, &stderr)
if err != nil {
t.Fatalf("tui --json failed: %v\nstderr:\n%s", err, stderr.String())
}
var rows []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &rows); err != nil {
t.Fatalf("invalid json: %v\n%s", err, stdout.String())
}
if len(rows) == 0 || rows[0]["title"] != "Launch Plan" || rows[0]["source"] != "notion" || rows[0]["kind"] != "page" || rows[0]["container"] != "Roadmap" || !strings.Contains(fmt.Sprint(rows[0]["text"]), "sync launch checklist") || !strings.Contains(fmt.Sprint(rows[0]["detail"]), "sync launch checklist") {
t.Fatalf("unexpected rows: %#v", rows)
}
after, err := os.ReadFile(dbPath)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(before, after) {
t.Fatal("tui --json mutated the sqlite database")
}
}
func TestTUIAllRowsIncludesDatabasesWhenPagesHitLimit(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
dbPath := filepath.Join(dir, "notcrawl.db")
st, err := store.Open(dbPath)
if err != nil {
t.Fatal(err)
}
now := store.NowMS()
if err := st.UpsertCollection(ctx, store.Collection{ID: "db1", Name: "Roadmap", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
for _, title := range []string{"Launch Plan", "Backlog"} {
if err := st.UpsertPage(ctx, store.Page{ID: title, CollectionID: "db1", Title: title, Alive: true, Source: "test", SyncedAt: now, LastEditedTime: now}); err != nil {
t.Fatal(err)
}
}
if err := st.Close(); err != nil {
t.Fatal(err)
}
var stdout, stderr bytes.Buffer
err = run(ctx, []string{"--config", filepath.Join(dir, "missing.toml"), "--db", dbPath, "tui", "--json", "--limit", "1"}, &stdout, &stderr)
if err != nil {
t.Fatalf("tui --json failed: %v\nstderr:\n%s", err, stderr.String())
}
var rows []map[string]any
if err := json.Unmarshal(stdout.Bytes(), &rows); err != nil {
t.Fatalf("invalid json: %v\n%s", err, stdout.String())
}
seen := map[string]bool{}
for _, row := range rows {
seen[fmt.Sprint(row["kind"])] = true
}
if !seen["page"] || !seen["database"] {
t.Fatalf("all rows should include pages and databases despite page limit: %#v", rows)
}
}
func TestCollectionTUIRowsResolveParentCollectionNames(t *testing.T) {
rows := collectionTUIRows([]store.Collection{{
ID: "child-db",
SpaceID: "space1",
ParentID: "parent-db",
ParentTable: "collection",
Name: "Child",
Source: "test",
}}, 10, nil, map[string]string{"parent-db": "Parent Database"}, map[string]string{"space1": "Workspace"})
if len(rows) != 1 {
t.Fatalf("rows = %#v", rows)
}
if rows[0].ParentID != "Parent Database" {
t.Fatalf("parent label = %q", rows[0].ParentID)
}
if rows[0].Scope != "Workspace" {
t.Fatalf("scope = %q", rows[0].Scope)
}
if !strings.Contains(rows[0].Detail, "Parent: Parent Database") {
t.Fatalf("detail = %q", rows[0].Detail)
}
}
func TestTUIRowsHideRawNotionParentIDs(t *testing.T) {
rows := pageTUIRows([]store.Page{{
ID: "page1",
SpaceID: "space1",
ParentID: "space:00b8cbcf-c520-4790-999a-9c2940263721",
ParentTable: "space",
CollectionID: "",
Title: "Launch Plan",
Alive: true,
Source: "test",
LastEditedTime: 1000,
}}, 10, nil, nil, map[string]string{"space1": "Comet.com", "00b8cbcf-c520-4790-999a-9c2940263721": "Comet.com"}, nil, nil)
if len(rows) != 1 {
t.Fatalf("rows = %#v", rows)
}
if rows[0].ParentID != "Workspace: Comet.com" {
t.Fatalf("parent label = %q", rows[0].ParentID)
}
rows = pageTUIRows([]store.Page{{
ID: "page2",
SpaceID: "space1",
ParentID: "330b54b1-d7cc-4cd7-96bc-4d705b5f37bf",
ParentTable: "block",
Title: "Nested",
Alive: true,
Source: "test",
}}, 10, nil, nil, map[string]string{"space1": "Comet.com"}, nil, nil)
if rows[0].ParentID != "Workspace: Comet.com" {
t.Fatalf("workspace fallback parent = %q", rows[0].ParentID)
}
}
func TestTUIRowsHideNoisyNotionBlockParentLabels(t *testing.T) {
rows := pageTUIRows([]store.Page{{
ID: "page1",
SpaceID: "space1",
ParentID: "block1",
ParentTable: "block",
Title: "Child",
Alive: true,
Source: "test",
}}, 10, map[string]string{
"block1": "ce 2fd71240-10a3-80a0-a65a-007aec07c0d9 00b8cbcf-c520-4790-999a-9c2940263721 Pods",
}, nil, map[string]string{"space1": "Comet.com"}, nil, nil)
if len(rows) != 1 {
t.Fatalf("rows = %#v", rows)
}
if rows[0].ParentID != "Workspace: Comet.com" {
t.Fatalf("noisy parent label = %q", rows[0].ParentID)
}
}
func TestTUIRowsResolveBlockParentToOwningPage(t *testing.T) {
rows := pageTUIRows([]store.Page{{
ID: "page1",
SpaceID: "space1",
ParentID: "block-child",
ParentTable: "block",
Title: "Nested",
Alive: true,
Source: "test",
}}, 10, map[string]string{
"parent-page": "Customer Folder",
}, nil, map[string]string{"space1": "Comet.com"}, map[string]store.ParentRef{
"block-child": {ID: "block-parent", Table: "block"},
"block-parent": {ID: "parent-page", Table: "page"},
}, nil)
if len(rows) != 1 {
t.Fatalf("rows = %#v", rows)
}
if rows[0].ParentID != "Customer Folder" {
t.Fatalf("resolved parent label = %q", rows[0].ParentID)
}
}
func TestBlockPreviewKeepsNotionPageShape(t *testing.T) {
blocks := []store.Block{
{Type: "heading_1", Text: "Launch Plan"},
{Type: "bulleted_list", Text: "ship tui"},
{Type: "to_do", Text: "verify local binary"},
{Type: "numbered_list", Text: "open terminal"},
{Type: "quote", Text: "keep it readable"},
{Type: "code", Text: "notcrawl tui"},
}
got := blockPreview(blocks, tuiPagePreviewMax)
for _, want := range []string{"# Launch Plan", "- ship tui", "- [ ] verify local binary", "1. open terminal", "> keep it readable", " notcrawl tui"} {
if !strings.Contains(got, want) {
t.Fatalf("preview missing %q:\n%s", want, got)
}
}
}
func TestBlockPreviewCleansLegacyNotionMarkers(t *testing.T) {
got := blockPreview([]store.Block{
{Type: "paragraph", Text: "Option A: b"},
{Type: "paragraph", Text: "Marketing Customer Reference Rights a https://example.com/sheet"},
}, tuiPagePreviewMax)
if strings.Contains(got, " a https://") || strings.Contains(got, ": b") {
t.Fatalf("preview leaked legacy markers:\n%s", got)
}
for _, want := range []string{"Option A:", "Marketing Customer Reference Rights <https://example.com/sheet>"} {
if !strings.Contains(got, want) {
t.Fatalf("preview missing %q:\n%s", want, got)
}
}
}
func TestBlockPreviewCompactsRepeatedLinkedPages(t *testing.T) {
got := blockPreview([]store.Block{{
Type: "paragraph",
Text: "linked page, linked page, linked page Add details",
}}, tuiPagePreviewMax)
if got != "linked pages Add details" {
t.Fatalf("got %q", got)
}
}
func TestPagePreviewIncludesComments(t *testing.T) {
got := pagePreview(
[]store.Block{{Type: "paragraph", Text: "status update"}},
[]store.Comment{{Text: "looks good"}, {Text: "ship it"}},
tuiPagePreviewMax,
)
for _, want := range []string{"status update", "## Comments", "- looks good", "- ship it"} {
if !strings.Contains(got, want) {
t.Fatalf("page preview missing %q:\n%s", want, got)
}
}
}
func TestHelpMentionsTUI(t *testing.T) {
var stdout bytes.Buffer
if err := run(context.Background(), []string{"--help"}, &stdout, &bytes.Buffer{}); err != nil {
t.Fatal(err)
}
if !strings.Contains(stdout.String(), "tui") {
t.Fatalf("help missing tui command:\n%s", stdout.String())
}
}
func TestHelpAfterGlobalFlagsHasNoSideEffects(t *testing.T) {
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
var stdout, stderr bytes.Buffer
err := run(context.Background(), []string{"--config", configPath, "--db", filepath.Join(dir, "notcrawl.db"), "--help"}, &stdout, &stderr)
if err != nil {
t.Fatal(err)
}
if !strings.Contains(stdout.String(), "Usage of notcrawl:") || !strings.Contains(stdout.String(), "tui") {
t.Fatalf("help missing usage:\n%s", stdout.String())
}
if stderr.String() != "" {
t.Fatalf("unexpected stderr:\n%s", stderr.String())
}
if _, err := os.Stat(configPath); !errors.Is(err, os.ErrNotExist) {
t.Fatalf("help should not write config, stat err=%v", err)
}
}
func TestInitHelpDoesNotWriteConfig(t *testing.T) {
dir := t.TempDir()
configPath := filepath.Join(dir, "config.toml")
var stdout, stderr bytes.Buffer
err := run(context.Background(), []string{"--config", configPath, "init", "--help"}, &stdout, &stderr)
if err != nil {
t.Fatal(err)
}
if !strings.Contains(stdout.String(), "Usage of init:") {
t.Fatalf("init help missing usage:\n%s", stdout.String())
}
if stderr.String() != "" {
t.Fatalf("unexpected stderr:\n%s", stderr.String())
}
if _, err := os.Stat(configPath); !errors.Is(err, os.ErrNotExist) {
t.Fatalf("init --help should not write config, stat err=%v", err)
}
}
func TestVersionFlagWorksWithOtherGlobalFlags(t *testing.T) {
var stdout bytes.Buffer
err := run(context.Background(), []string{"--config", filepath.Join(t.TempDir(), "missing.toml"), "--version"}, &stdout, &bytes.Buffer{})
if err != nil {
t.Fatal(err)
}
if got := strings.TrimSpace(stdout.String()); got != version {
t.Fatalf("version = %q", got)
}
}
func TestMetadataDoesNotMarkPlainTextCommandsAsJSON(t *testing.T) {
var stdout bytes.Buffer
if err := run(context.Background(), []string{"metadata"}, &stdout, &bytes.Buffer{}); err != nil {
t.Fatal(err)
}
var manifest struct {
Commands map[string]struct {
JSON bool `json:"json"`
} `json:"commands"`
}
if err := json.Unmarshal(stdout.Bytes(), &manifest); err != nil {
t.Fatalf("invalid metadata JSON: %v\n%s", err, stdout.String())
}
for _, name := range []string{"sync", "tap", "publish", "subscribe", "update"} {
if manifest.Commands[name].JSON {
t.Fatalf("%s should not be advertised as JSON", name)
}
}
for _, name := range []string{"status", "doctor", "tui-json"} {
if !manifest.Commands[name].JSON {
t.Fatalf("%s should be advertised as JSON", name)
}
}
}
func TestSyncEmitsProgressPercentToStderr(t *testing.T) {
dir := t.TempDir()
var stdout, stderr bytes.Buffer
err := run(context.Background(), []string{
"--config", filepath.Join(dir, "missing.toml"),
"--db", filepath.Join(dir, "notcrawl.db"),
"sync", "--source", "desktop",
}, &stdout, &stderr)
if err != nil {
t.Fatalf("sync failed: %v\nstdout:\n%s\nstderr:\n%s", err, stdout.String(), stderr.String())
}
logs := stderr.String()
for _, want := range []string{`msg="sync progress"`, `state=finished`, `percent=100.0`, `completion=100.0%`, `phase=desktop`} {
if !strings.Contains(logs, want) {
t.Fatalf("missing %q in progress logs:\n%s", want, logs)
}
}
}
func TestTUIHelpReturnsUsage(t *testing.T) {
var stdout bytes.Buffer
var stderr bytes.Buffer
if err := run(context.Background(), []string{"tui", "--help"}, &stdout, &stderr); err != nil {
t.Fatal(err)
}
if !strings.Contains(stdout.String(), "Usage of tui:") || !strings.Contains(stdout.String(), "-limit") || !strings.Contains(stdout.String(), "right-click") || !strings.Contains(stdout.String(), "# jump") {
t.Fatalf("tui help missing usage:\n%s", stdout.String())
}
if stderr.String() != "" {
t.Fatalf("unexpected stderr:\n%s", stderr.String())
}
}
func TestExportDatabaseAllWritesFilesAndIndex(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
dbPath := filepath.Join(dir, "notcrawl.db")
st, err := store.Open(dbPath)
if err != nil {
t.Fatal(err)
}
now := store.NowMS()
for _, collection := range []store.Collection{
{ID: "db1", Name: "Roadmap", Source: "test", SyncedAt: now, SchemaJSON: `{"Name":{"type":"title"}}`},
{ID: "db2", Name: "Launch 🚀 Plan ✅", Source: "test", SyncedAt: now, SchemaJSON: `{"Task":{"type":"title"}}`},
} {
if err := st.UpsertCollection(ctx, collection); err != nil {
t.Fatal(err)
}
}
if err := st.UpsertPage(ctx, store.Page{
ID: "page1", CollectionID: "db1", Title: "Ship", URL: "https://example.com/ship", Alive: true, Source: "test", SyncedAt: now,
PropertiesJSON: `{"Name":{"type":"title","title":[{"plain_text":"Ship"}]}}`,
}); err != nil {
t.Fatal(err)
}
if err := st.Close(); err != nil {
t.Fatal(err)
}
outDir := filepath.Join(dir, "csv")
var stdout, stderr bytes.Buffer
err = run(ctx, []string{"--config", filepath.Join(dir, "missing.toml"), "--db", dbPath, "export-db", "--all", "--dir", outDir}, &stdout, &stderr)
if err != nil {
t.Fatalf("export-db --all failed: %v\nstderr:\n%s", err, stderr.String())
}
if got := stdout.String(); !strings.Contains(got, "exported 2 databases and 1 rows") {
t.Fatalf("unexpected stdout: %s", got)
}
for _, name := range []string{"roadmap-db1.csv", "launch-plan-db2.csv", "index.tsv"} {
if _, err := os.Stat(filepath.Join(outDir, name)); err != nil {
t.Fatalf("missing %s: %v", name, err)
}
}
index, err := os.ReadFile(filepath.Join(outDir, "index.tsv"))
if err != nil {
t.Fatal(err)
}
for _, want := range []string{"id\tname\tsource\trows\tcolumns\tfile", "db1\tRoadmap\ttest\t1\t4\troadmap-db1.csv"} {
if !strings.Contains(string(index), want) {
t.Fatalf("index missing %q:\n%s", want, index)
}
}
}

View File

@ -10,6 +10,19 @@ go test ./...
go build ./cmd/notcrawl
```
Also smoke the crawlkit control and non-interactive TUI surfaces before a tag:
```bash
notcrawl metadata --json
notcrawl status --json
notcrawl doctor --json
notcrawl tui --json --limit 10
```
The CI workflow runs the same control-surface smoke checks, plus dependency
verification, `gofmt`, `go vet`, tests, a GoReleaser snapshot build, and
CodeQL.
If GoReleaser is installed:
```bash

34
go.mod
View File

@ -1,10 +1,32 @@
module github.com/vincentkoc/notcrawl
go 1.26.0
go 1.26.2
require modernc.org/sqlite v1.50.0
require (
github.com/pelletier/go-toml/v2 v2.2.4
modernc.org/sqlite v1.46.1
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/charmbracelet/bubbles v1.0.0 // indirect
github.com/charmbracelet/bubbletea v1.3.10 // indirect
github.com/charmbracelet/colorprofile v0.4.1 // indirect
github.com/charmbracelet/lipgloss v1.1.0 // indirect
github.com/charmbracelet/x/ansi v0.11.6 // indirect
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
github.com/charmbracelet/x/term v0.2.2 // indirect
github.com/clipperhouse/displaywidth v0.9.0 // indirect
github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.5.0 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.19 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/pelletier/go-toml/v2 v2.3.0 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/text v0.3.8 // indirect
)
require (
@ -13,9 +35,9 @@ require (
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
golang.org/x/sys v0.37.0 // indirect
modernc.org/libc v1.67.6 // indirect
github.com/vincentkoc/crawlkit v0.4.0
golang.org/x/sys v0.42.0 // indirect
modernc.org/libc v1.72.0 // indirect
modernc.org/mathutil v1.7.1 // indirect
modernc.org/memory v1.11.0 // indirect
)

93
go.sum
View File

@ -1,44 +1,89 @@
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc=
github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk=
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA=
github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA=
github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U=
github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM=
github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY=
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/vincentkoc/crawlkit v0.4.0 h1:1jQZAYbBivy6d7ewNdMZ8THgmJVwb+pQT0kH5Z9COHI=
github.com/vincentkoc/crawlkit v0.4.0/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/tools v0.38.0 h1:Hx2Xv8hISq8Lm16jvBZ2VQf+RLmbd7wVUsALibYI/IQ=
golang.org/x/tools v0.38.0/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
modernc.org/cc/v4 v4.27.1 h1:9W30zRlYrefrDV2JE2O8VDtJ1yPGownxciz5rrbQZis=
modernc.org/cc/v4 v4.27.1/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0=
modernc.org/ccgo/v4 v4.30.1 h1:4r4U1J6Fhj98NKfSjnPUN7Ze2c6MnAdL0hWw6+LrJpc=
modernc.org/ccgo/v4 v4.30.1/go.mod h1:bIOeI1JL54Utlxn+LwrFyjCx2n2RDiYEaJVSrgdrRfM=
modernc.org/fileutil v1.3.40 h1:ZGMswMNc9JOCrcrakF1HrvmergNLAmxOPjizirpfqBA=
modernc.org/fileutil v1.3.40/go.mod h1:HxmghZSZVAz/LXcMNwZPA/DRrQZEVP9VX0V4LQGQFOc=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U=
modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8=
modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU=
modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0=
modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito=
modernc.org/gc/v3 v3.1.1 h1:k8T3gkXWY9sEiytKhcgyiZ2L0DTyCQ/nvX+LoCljoRE=
modernc.org/gc/v3 v3.1.1/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo=
modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
modernc.org/libc v1.67.6 h1:eVOQvpModVLKOdT+LvBPjdQqfrZq+pC39BygcT+E7OI=
modernc.org/libc v1.67.6/go.mod h1:JAhxUVlolfYDErnwiqaLvUqc8nfb2r6S6slAgZOnaiE=
modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c=
modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ=
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
@ -47,8 +92,8 @@ modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
modernc.org/sqlite v1.46.1 h1:eFJ2ShBLIEnUWlLy12raN0Z1plqmFX9Qe3rjQTKt6sU=
modernc.org/sqlite v1.46.1/go.mod h1:CzbrU2lSB1DKUusvwGz7rqEKIq+NUd8GWuBBZDs9/nA=
modernc.org/sqlite v1.50.0 h1:eMowQSWLK0MeiQTdmz3lqoF5dqclujdlIKeJA11+7oM=
modernc.org/sqlite v1.50.0/go.mod h1:m0w8xhwYUVY3H6pSDwc3gkJ/irZT/0YEXwBlhaxQEew=
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=

View File

@ -8,7 +8,7 @@ import (
"strings"
"time"
"github.com/pelletier/go-toml/v2"
crawlconfig "github.com/vincentkoc/crawlkit/config"
)
const (
@ -49,12 +49,22 @@ type ShareConfig struct {
StaleAfter string `toml:"stale_after"`
}
var appConfig = crawlconfig.App{Name: "notcrawl", BaseDir: "~/" + defaultDirName, LegacyBaseDir: "~/" + defaultDirName}
func Default() Config {
base := filepath.ToSlash(filepath.Join("~", defaultDirName))
paths, err := appConfig.DefaultPaths()
if err != nil {
base := filepath.ToSlash(filepath.Join("~", defaultDirName))
paths = crawlconfig.Paths{
DBPath: filepath.ToSlash(filepath.Join(base, "notcrawl.db")),
CacheDir: filepath.ToSlash(filepath.Join(base, "cache")),
ShareDir: filepath.ToSlash(filepath.Join(base, "share")),
}
}
return Config{
DBPath: filepath.ToSlash(filepath.Join(base, "notcrawl.db")),
CacheDir: filepath.ToSlash(filepath.Join(base, "cache")),
MarkdownDir: filepath.ToSlash(filepath.Join(base, "pages")),
DBPath: filepath.ToSlash(paths.DBPath),
CacheDir: filepath.ToSlash(paths.CacheDir),
MarkdownDir: filepath.ToSlash(filepath.Join(paths.BaseDir, "pages")),
Notion: NotionConfig{
Desktop: DesktopConfig{Enabled: true, Path: ""},
API: APIConfig{
@ -66,18 +76,15 @@ func Default() Config {
},
Share: ShareConfig{
Branch: "main",
RepoPath: filepath.ToSlash(filepath.Join(base, "share")),
RepoPath: filepath.ToSlash(paths.ShareDir),
StaleAfter: "1h",
},
}
}
func DefaultPath() (string, error) {
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
return filepath.Join(home, defaultDirName, "config.toml"), nil
paths, err := appConfig.DefaultPaths()
return paths.ConfigPath, err
}
func Load(path string) (Config, error) {
@ -93,8 +100,7 @@ func Load(path string) (Config, error) {
return Config{}, err
}
cfg := Default()
b, err := os.ReadFile(path)
if err != nil {
if err := crawlconfig.LoadTOML(path, &cfg); err != nil {
if errors.Is(err, os.ErrNotExist) {
if err := cfg.Resolve(); err != nil {
return Config{}, err
@ -103,9 +109,6 @@ func Load(path string) (Config, error) {
}
return Config{}, err
}
if err := toml.Unmarshal(b, &cfg); err != nil {
return Config{}, fmt.Errorf("parse config: %w", err)
}
if err := cfg.Resolve(); err != nil {
return Config{}, err
}
@ -133,11 +136,7 @@ func WriteStarter(path string) (string, error) {
return "", err
}
cfg := Default()
b, err := toml.Marshal(cfg)
if err != nil {
return "", err
}
return path, os.WriteFile(path, b, 0o600)
return path, crawlconfig.WriteTOML(path, cfg, 0o600)
}
func (c *Config) Resolve() error {
@ -177,17 +176,7 @@ func ExpandPath(path string) (string, error) {
if path == "" {
return "", nil
}
if path == "~" || strings.HasPrefix(path, "~/") {
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
if path == "~" {
return home, nil
}
return filepath.Join(home, path[2:]), nil
}
return filepath.Abs(path)
return filepath.Abs(crawlconfig.ExpandHome(path))
}
func (c Config) APIToken() string {

View File

@ -79,7 +79,7 @@ func TestExporterUsesDisplayOrder(t *testing.T) {
}
}
func TestExporterPreservesUnicodePathNames(t *testing.T) {
func TestExporterRemovesEmojiFromPathNames(t *testing.T) {
ctx := context.Background()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
@ -99,7 +99,7 @@ func TestExporterPreservesUnicodePathNames(t *testing.T) {
if err != nil {
t.Fatal(err)
}
want := filepath.Join(dir, "研究-🚀", "計画-✅-q2-page1.md")
want := filepath.Join(dir, "研究", "計画-q2-page1.md")
if len(s.Files) != 1 || s.Files[0] != want {
t.Fatalf("unexpected export path: %+v, want %s", s.Files, want)
}

View File

@ -17,6 +17,8 @@ import (
const SourceName = "api"
const maxAPIAttempts = 4
type Client struct {
BaseURL string
Version string
@ -448,56 +450,50 @@ func (c Client) ingestComments(ctx context.Context, st *store.Store, pageID, spa
}
func (c Client) do(ctx context.Context, method, path string, body any, out any) error {
var reader io.Reader
var bodyBytes []byte
if body != nil {
b, err := json.Marshal(body)
if err != nil {
return err
}
reader = bytes.NewReader(b)
bodyBytes = b
}
req, err := http.NewRequestWithContext(ctx, method, strings.TrimRight(c.BaseURL, "/")+path, reader)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer "+c.Token)
req.Header.Set("Notion-Version", c.Version)
req.Header.Set("Accept", "application/json")
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := c.HTTP.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusTooManyRequests {
if wait, err := time.ParseDuration(resp.Header.Get("Retry-After") + "s"); err == nil && wait > 0 {
timer := time.NewTimer(wait)
select {
case <-ctx.Done():
timer.Stop()
return ctx.Err()
case <-timer.C:
}
return c.do(ctx, method, path, body, out)
for attempt := 1; attempt <= maxAPIAttempts; attempt++ {
var reader io.Reader
if bodyBytes != nil {
reader = bytes.NewReader(bodyBytes)
}
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
req, err := http.NewRequestWithContext(ctx, method, strings.TrimRight(c.BaseURL, "/")+path, reader)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer "+c.Token)
req.Header.Set("Notion-Version", c.Version)
req.Header.Set("Accept", "application/json")
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := c.HTTP.Do(req)
if err != nil {
return err
}
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
defer resp.Body.Close()
return json.NewDecoder(resp.Body).Decode(out)
}
b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
bodyText := strings.TrimSpace(string(b))
apiErr := notionAPIError{Method: method, Path: path, Status: resp.Status, StatusCode: resp.StatusCode, Body: bodyText}
var payload struct {
Code string `json:"code"`
Message string `json:"message"`
}
if err := json.Unmarshal(b, &payload); err == nil {
apiErr.Code = payload.Code
apiErr.Message = payload.Message
resp.Body.Close()
apiErr := apiErrorFromResponse(method, path, resp, b)
if attempt < maxAPIAttempts && shouldRetry(apiErr) {
if err := waitBeforeRetry(ctx, apiErr.RetryAfter); err != nil {
return err
}
continue
}
return apiErr
}
return json.NewDecoder(resp.Body).Decode(out)
return nil
}
type notionAPIError struct {
@ -508,6 +504,8 @@ type notionAPIError struct {
Code string
Message string
Body string
RetryAfter time.Duration
Retryable bool
}
func (e notionAPIError) Error() string {
@ -517,6 +515,76 @@ func (e notionAPIError) Error() string {
return fmt.Sprintf("notion api %s %s: %s: %s", e.Method, e.Path, e.Status, e.Body)
}
func apiErrorFromResponse(method, path string, resp *http.Response, body []byte) notionAPIError {
bodyText := strings.TrimSpace(string(body))
apiErr := notionAPIError{
Method: method,
Path: path,
Status: resp.Status,
StatusCode: resp.StatusCode,
Body: bodyText,
RetryAfter: retryAfter(resp.Header.Get("Retry-After"), body),
}
var payload struct {
Code string `json:"code"`
Message string `json:"message"`
Retryable bool `json:"retryable"`
RetryAfter float64 `json:"retry_after"`
}
if err := json.Unmarshal(body, &payload); err == nil {
apiErr.Code = payload.Code
apiErr.Message = payload.Message
apiErr.Retryable = payload.Retryable
if payload.RetryAfter > 0 && apiErr.RetryAfter == 0 {
apiErr.RetryAfter = time.Duration(payload.RetryAfter * float64(time.Second))
}
}
return apiErr
}
func shouldRetry(err notionAPIError) bool {
if err.StatusCode == http.StatusTooManyRequests || err.Retryable {
return true
}
return err.StatusCode == http.StatusBadGateway ||
err.StatusCode == http.StatusServiceUnavailable ||
err.StatusCode == http.StatusGatewayTimeout
}
func retryAfter(header string, body []byte) time.Duration {
if header != "" {
if seconds, err := time.ParseDuration(header + "s"); err == nil && seconds > 0 {
return seconds
}
if when, err := http.ParseTime(header); err == nil {
if wait := time.Until(when); wait > 0 {
return wait
}
}
}
var payload struct {
RetryAfter float64 `json:"retry_after"`
}
if err := json.Unmarshal(body, &payload); err == nil && payload.RetryAfter > 0 {
return time.Duration(payload.RetryAfter * float64(time.Second))
}
return 0
}
func waitBeforeRetry(ctx context.Context, wait time.Duration) error {
if wait <= 0 {
return nil
}
timer := time.NewTimer(wait)
defer timer.Stop()
select {
case <-ctx.Done():
return ctx.Err()
case <-timer.C:
return nil
}
}
func isIgnoredCommentError(err error) bool {
apiErr, ok := err.(notionAPIError)
if !ok {

View File

@ -32,7 +32,7 @@ func TestSyncIngestsDatabasesAndRows(t *testing.T) {
"results":[{
"object":"database",
"id":"db1",
"title":[{"plain_text":"Roadmap"}],
"title":[{"type":"text","plain_text":"Roadmap","text":{"content":"Roadmap"}}],
"parent":{"type":"workspace","workspace":true},
"properties":{
"Name":{"id":"title","type":"title","title":{}},
@ -57,7 +57,7 @@ func TestSyncIngestsDatabasesAndRows(t *testing.T) {
"url":"https://notion.so/page1",
"parent":{"type":"database_id","database_id":"db1"},
"properties":{
"Name":{"id":"title","type":"title","title":[{"plain_text":"Ship"}]},
"Name":{"id":"title","type":"title","title":[{"type":"text","plain_text":"Ship","text":{"content":"Ship"}}]},
"Status":{"id":"status","type":"select","select":{"name":"Done"}}
}
}],
@ -93,7 +93,7 @@ func TestSyncIngestsDatabasesAndRows(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "db1" {
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "db1" || rows[0].Title != "Ship" {
t.Fatalf("unexpected rows: %+v", rows)
}
}
@ -122,7 +122,7 @@ func TestSyncIngestsCurrentDataSourcesAndRows(t *testing.T) {
"results":[{
"object":"data_source",
"id":"ds1",
"title":[{"plain_text":"Roadmap"}],
"title":[{"type":"text","plain_text":"Roadmap","text":{"content":"Roadmap"}}],
"parent":{"type":"database_id","database_id":"db1"},
"database_parent":{"type":"page_id","page_id":"page-parent"},
"properties":{
@ -147,7 +147,7 @@ func TestSyncIngestsCurrentDataSourcesAndRows(t *testing.T) {
"url":"https://notion.so/page1",
"parent":{"type":"data_source_id","data_source_id":"ds1"},
"properties":{
"Name":{"id":"title","type":"title","title":[{"plain_text":"Ship"}]},
"Name":{"id":"title","type":"title","title":[{"type":"text","plain_text":"Ship","text":{"content":"Ship"}}]},
"Status":{"id":"status","type":"select","select":{"name":"Done"}}
}
}],
@ -176,14 +176,14 @@ func TestSyncIngestsCurrentDataSourcesAndRows(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if len(collections) != 1 || collections[0].ID != "ds1" || collections[0].ParentID != "db1" {
if len(collections) != 1 || collections[0].ID != "ds1" || collections[0].ParentID != "db1" || collections[0].Name != "Roadmap" {
t.Fatalf("unexpected collections: %+v", collections)
}
rows, err := st.CollectionPages(context.Background(), "ds1")
if err != nil {
t.Fatal(err)
}
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "ds1" {
if len(rows) != 1 || rows[0].ID != "page1" || rows[0].CollectionID != "ds1" || rows[0].Title != "Ship" {
t.Fatalf("unexpected rows: %+v", rows)
}
}
@ -213,3 +213,52 @@ func TestIngestCommentsSkipsRestrictedResource(t *testing.T) {
t.Fatalf("unexpected comment count: %d", count)
}
}
func TestIngestCommentsRetriesTransientGatewayError(t *testing.T) {
attempts := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
if r.URL.Path != "/comments" {
t.Fatalf("unexpected request: %s %s", r.Method, r.URL.String())
}
attempts++
if attempts == 1 {
w.WriteHeader(http.StatusBadGateway)
_, _ = w.Write([]byte(`{"retryable":true,"retry_after":0}`))
return
}
_, _ = w.Write([]byte(`{
"object":"list",
"results":[{
"id":"comment1",
"rich_text":[{"type":"text","plain_text":"Looks good","text":{"content":"Looks good"}}],
"created_by":{"id":"user1"},
"created_time":"2026-01-01T00:00:00Z",
"last_edited_time":"2026-01-01T00:00:00Z"
}],
"has_more":false
}`))
}))
defer server.Close()
st, err := store.Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
count, err := (Client{BaseURL: server.URL, Version: "2026-03-11", Token: "secret", HTTP: http.DefaultClient}).ingestComments(context.Background(), st, "page1", "space1")
if err != nil {
t.Fatal(err)
}
if count != 1 || attempts != 2 {
t.Fatalf("unexpected count/attempts: count=%d attempts=%d", count, attempts)
}
comments, err := st.PageComments(context.Background(), "page1")
if err != nil {
t.Fatal(err)
}
if len(comments) != 1 || comments[0].Text != "Looks good" {
t.Fatalf("unexpected comments: %+v", comments)
}
}

View File

@ -8,12 +8,39 @@ import (
"unicode"
)
var spaceRE = regexp.MustCompile(`\s+`)
var (
spaceRE = regexp.MustCompile(`\s+`)
legacyInlineLinkArtifactRE = regexp.MustCompile(`\ba\s+((?:https?://|/)[^\s]+)`)
legacyInlineMarkArtifactRE = regexp.MustCompile(`\s+\b[bius]\b($|[\s,.;:])`)
legacyMentionArtifactRE = regexp.MustCompile(`\bm\s+[0-9a-fA-F]{8}-[0-9a-fA-F-]{8,}(?:\s+[0-9a-fA-F-]{12,})?`)
legacyPageMentionRE = regexp.MustCompile(`(?:‣\s*)?p\s+[0-9a-fA-F]{8}-[0-9a-fA-F-]{8,}(?:\s+[0-9a-fA-F-]{12,})?`)
legacyLinkedMentionRE = regexp.MustCompile(`‣\s+lm\s+`)
legacyBareMentionRE = regexp.MustCompile(`‣\s+[0-9a-fA-F]{8}-[0-9a-fA-F-]{8,}`)
spaceBeforePunctuationRE = regexp.MustCompile(`\s+([,.;:])`)
repeatedCommaRE = regexp.MustCompile(`(?:,\s*){2,}`)
repeatedLinkedPageRE = regexp.MustCompile(`linked page\b(?:,\s*linked page\b)+`)
)
func Normalize(s string) string {
return strings.TrimSpace(spaceRE.ReplaceAllString(s, " "))
}
func CleanLegacyArtifacts(s string) string {
s = legacyInlineLinkArtifactRE.ReplaceAllString(s, "<$1>")
s = legacyInlineMarkArtifactRE.ReplaceAllString(s, "$1")
s = legacyMentionArtifactRE.ReplaceAllString(s, "@mention")
s = legacyPageMentionRE.ReplaceAllString(s, "linked page")
s = legacyLinkedMentionRE.ReplaceAllString(s, "‣ ")
s = legacyBareMentionRE.ReplaceAllString(s, "@mention")
s = Normalize(s)
s = repeatedCommaRE.ReplaceAllString(s, ", ")
s = repeatedLinkedPageRE.ReplaceAllString(s, "linked pages")
s = strings.ReplaceAll(s, "linked pagess", "linked pages")
s = spaceBeforePunctuationRE.ReplaceAllString(s, "$1")
s = strings.ReplaceAll(s, " and, ", ", ")
return Normalize(s)
}
func PlainFromJSON(raw string) string {
if strings.TrimSpace(raw) == "" {
return ""
@ -103,7 +130,7 @@ func Slug(s string) string {
}
func isSlugRune(r rune) bool {
return unicode.IsLetter(r) || unicode.IsNumber(r) || unicode.IsMark(r) || (r > unicode.MaxASCII && unicode.IsSymbol(r)) || r == '\u200d'
return unicode.IsLetter(r) || unicode.IsNumber(r)
}
func isSlugSeparator(r rune) bool {
@ -127,23 +154,88 @@ func walk(v any, parts *[]string) {
*parts = append(*parts, x)
}
case []any:
if text, ok := legacyRichTextPart(x); ok {
*parts = append(*parts, text)
return
}
for _, item := range x {
walk(item, parts)
}
case map[string]any:
for _, key := range []string{"plain_text", "content", "text", "name", "title"} {
if text, ok := normalizedString(x["plain_text"]); ok {
*parts = append(*parts, text)
return
}
if text, ok := richTextContent(x["text"]); ok {
*parts = append(*parts, text)
return
}
if text, ok := normalizedString(x["content"]); ok {
*parts = append(*parts, text)
return
}
for _, key := range []string{"name", "title", "rich_text", "text"} {
if value, ok := x[key]; ok {
walk(value, parts)
}
}
if rt, ok := x["rich_text"]; ok {
walk(rt, parts)
}
if title, ok := x["title"]; ok {
walk(title, parts)
}
if text, ok := x["text"].(map[string]any); ok {
walk(text["content"], parts)
}
}
}
func legacyRichTextPart(values []any) (string, bool) {
if len(values) == 0 {
return "", false
}
text, ok := normalizedString(values[0])
if !ok {
return "", false
}
if len(values) < 2 {
return text, true
}
if link := legacyAnnotationLink(values[1]); link != "" {
return Normalize(text + " <" + link + ">"), true
}
return text, true
}
func legacyAnnotationLink(value any) string {
values, ok := value.([]any)
if !ok {
return ""
}
for _, item := range values {
annotation, ok := item.([]any)
if !ok || len(annotation) < 2 {
continue
}
code, ok := annotation[0].(string)
if !ok || code != "a" {
continue
}
if link, ok := normalizedString(annotation[1]); ok {
return link
}
}
return ""
}
func richTextContent(v any) (string, bool) {
m, ok := v.(map[string]any)
if !ok {
return "", false
}
return normalizedString(m["content"])
}
func normalizedString(v any) (string, bool) {
s, ok := v.(string)
if !ok {
return "", false
}
s = Normalize(s)
if s == "" {
return "", false
}
return s, true
}

View File

@ -9,6 +9,90 @@ func TestTitleFromProperties(t *testing.T) {
}
}
func TestTitleFromPropertiesPrefersNotionRichTextOnce(t *testing.T) {
got := TitleFromProperties(`{
"Name": {
"id": "title",
"type": "title",
"title": [{
"type": "text",
"plain_text": "OpenClaw",
"text": {"content": "OpenClaw"}
}]
}
}`)
if got != "OpenClaw" {
t.Fatalf("got %q", got)
}
}
func TestPlainPrefersNotionRichTextPlainTextOnce(t *testing.T) {
got := Plain([]any{map[string]any{
"type": "text",
"plain_text": "OpenClaw",
"text": map[string]any{
"content": "OpenClaw",
},
}})
if got != "OpenClaw" {
t.Fatalf("got %q", got)
}
}
func TestPlainFallsBackToNotionTextContentOnce(t *testing.T) {
got := Plain([]any{map[string]any{
"type": "text",
"text": map[string]any{
"content": "OpenClaw",
},
}})
if got != "OpenClaw" {
t.Fatalf("got %q", got)
}
}
func TestPlainHandlesLegacyNotionAnnotations(t *testing.T) {
got := PlainFromJSON(`{"title":[["Marketing Customer Reference Rights",[["a","https://example.com/sheet"]]],[" "],["Product Marketing",[["b"]]]]}`)
if got != "Marketing Customer Reference Rights <https://example.com/sheet> Product Marketing" {
t.Fatalf("got %q", got)
}
}
func TestCleanLegacyArtifacts(t *testing.T) {
got := CleanLegacyArtifacts("Option A: b\nMarketing Customer Reference Rights a https://example.com/sheet\nm 35171240-10a3-80ff-95be-001c31559035 It works")
if got != "Option A: Marketing Customer Reference Rights <https://example.com/sheet> @mention It works" {
t.Fatalf("got %q", got)
}
}
func TestCleanLegacyArtifactsRemovesMentionOpcodes(t *testing.T) {
got := CleanLegacyArtifacts("reach out to ‣ 1b1d872b-594c-811a-ad82-0002ea4fc797 and ‣ p 24d71240-10a3-80ae-8bde-d59bf00682c0 00b8cbcf-c520-4790-999a-9c2940263721,,, see ‣ lm Weekly Walk")
if got != "reach out to @mention and linked page, see ‣ Weekly Walk" {
t.Fatalf("got %q", got)
}
}
func TestCleanLegacyArtifactsCompactsRepeatedLinkedPages(t *testing.T) {
got := CleanLegacyArtifacts("ask ‣ p 24d71240-10a3-80ae-8bde-d59bf00682c0 00b8cbcf-c520-4790-999a-9c2940263721, ‣ p 24d71240-10a3-80d3-a3b0-c06884bad333 00b8cbcf-c520-4790-999a-9c2940263721, ‣ p 1de71240-10a3-809a-98f9-ea6f4d8702b3 00b8cbcf-c520-4790-999a-9c2940263721 Add notes")
if got != "ask linked pages Add notes" {
t.Fatalf("got %q", got)
}
}
func TestPlainWalksTitleOnlyOnce(t *testing.T) {
got := Plain(map[string]any{
"title": []any{map[string]any{
"plain_text": "Roadmap",
"text": map[string]any{
"content": "Roadmap",
},
}},
})
if got != "Roadmap" {
t.Fatalf("got %q", got)
}
}
func TestSlug(t *testing.T) {
got := Slug("Launch Plan / Q2")
if got != "launch-plan-q2" {
@ -16,9 +100,9 @@ func TestSlug(t *testing.T) {
}
}
func TestSlugPreservesUnicodePathText(t *testing.T) {
func TestSlugRemovesEmojiPathText(t *testing.T) {
got := Slug("研究 🚀 / 計画 ✅")
if got != "研究-🚀-計画-✅" {
if got != "研究-計画" {
t.Fatalf("got %q", got)
}
}
@ -30,6 +114,13 @@ func TestSlugRemovesUnsafePathText(t *testing.T) {
}
}
func TestSlugRemovesEmojiVariationSelectors(t *testing.T) {
got := Slug("⚠️ Production Incident Guide")
if got != "production-incident-guide" {
t.Fatalf("got %q", got)
}
}
func TestShortIDKeepsEnoughEntropyForDesktopIDs(t *testing.T) {
got := ShortID("24f71240-0000-0000-0000-123456789abc")
if got != "24f71240-56789abc" {

View File

@ -17,6 +17,7 @@ import (
"syscall"
"time"
"github.com/vincentkoc/crawlkit/mirror"
"github.com/vincentkoc/notcrawl/internal/store"
)
@ -117,22 +118,14 @@ func Publish(ctx context.Context, st *store.Store, opts PublishOptions) (Publish
}
s := PublishSummary{Manifest: manifest}
if opts.Commit {
if err := runGit(ctx, opts.RepoPath, "add", "manifest.json", "data", "pages"); err != nil {
return s, err
}
dirty, err := hasChanges(ctx, opts.RepoPath)
committed, err := commitGenerated(ctx, opts.RepoPath, opts.Message)
if err != nil {
return s, err
}
if dirty {
if err := runGit(ctx, opts.RepoPath, "commit", "-m", opts.Message); err != nil {
return s, err
}
s.Committed = true
}
s.Committed = committed
}
if opts.Push {
if err := runGit(ctx, opts.RepoPath, "push", "-u", "origin", opts.Branch); err != nil {
if err := mirror.Push(ctx, mirror.Options{RepoPath: opts.RepoPath, Remote: opts.Remote, Branch: opts.Branch}); err != nil {
return s, err
}
s.Pushed = true
@ -167,28 +160,17 @@ func Subscribe(ctx context.Context, st *store.Store, remote, repoPath, branch st
if branch == "" {
branch = "main"
}
if _, err := os.Stat(filepath.Join(repoPath, ".git")); os.IsNotExist(err) {
if err := os.MkdirAll(filepath.Dir(repoPath), 0o755); err != nil {
return Manifest{}, err
}
if err := run(ctx, "", "git", "clone", "--branch", branch, remote, repoPath); err != nil {
return Manifest{}, err
}
} else if err == nil {
if err := runGit(ctx, repoPath, "pull", "--ff-only", "origin", branch); err != nil {
return Manifest{}, err
}
} else {
if err := mirror.Pull(ctx, mirror.Options{RepoPath: repoPath, Remote: remote, Branch: branch}); err != nil {
return Manifest{}, err
}
return Import(ctx, st, repoPath)
}
func Update(ctx context.Context, st *store.Store, repoPath, branch string) (Manifest, error) {
func Update(ctx context.Context, st *store.Store, remote, repoPath, branch string) (Manifest, error) {
if branch == "" {
branch = "main"
}
if err := runGit(ctx, repoPath, "pull", "--ff-only", "origin", branch); err != nil {
if err := pullForUpdate(ctx, repoPath, remote, branch); err != nil {
return Manifest{}, err
}
return Import(ctx, st, repoPath)
@ -286,35 +268,72 @@ func importTable(ctx context.Context, db *sql.DB, path, table string) error {
}
func ensureRepo(ctx context.Context, repoPath, remote, branch string) error {
if err := os.MkdirAll(repoPath, 0o755); err != nil {
if err := mirror.EnsureRepo(ctx, mirror.Options{RepoPath: repoPath, Remote: remote, Branch: branch}); err != nil {
return err
}
if _, err := os.Stat(filepath.Join(repoPath, ".git")); os.IsNotExist(err) {
if err := runGit(ctx, repoPath, "init", "-b", branch); err != nil {
return err
}
} else if err != nil {
return err
remote = strings.TrimSpace(remote)
if remote == "" {
return nil
}
if remote != "" {
if err := runGit(ctx, repoPath, "remote", "get-url", "origin"); err != nil {
if err := runGit(ctx, repoPath, "remote", "add", "origin", remote); err != nil {
return err
}
} else if err := runGit(ctx, repoPath, "remote", "set-url", "origin", remote); err != nil {
return err
if err := runGit(ctx, repoPath, "remote", "set-url", "origin", remote); err != nil {
if strings.Contains(err.Error(), "No such remote") {
return runGit(ctx, repoPath, "remote", "add", "origin", remote)
}
return err
}
return nil
}
func hasChanges(ctx context.Context, repoPath string) (bool, error) {
cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "status", "--porcelain")
out, err := cmd.Output()
return mirror.Dirty(ctx, mirror.Options{RepoPath: repoPath})
}
func pullForUpdate(ctx context.Context, repoPath, remote, branch string) error {
if strings.TrimSpace(remote) != "" {
return mirror.Pull(ctx, mirror.Options{RepoPath: repoPath, Remote: remote, Branch: branch})
}
if err := ensureRepo(ctx, repoPath, "", branch); err != nil {
return err
}
return runGit(ctx, repoPath, "pull", "--ff-only", "origin", branch)
}
func commitGenerated(ctx context.Context, repoPath, message string) (bool, error) {
if message == "" {
message = "archive: notcrawl snapshot"
}
if err := runGit(ctx, repoPath, "add", "--", "manifest.json", "data", "pages"); err != nil {
return false, err
}
staged, err := hasStagedGeneratedChanges(ctx, repoPath)
if err != nil {
return false, err
}
return strings.TrimSpace(string(out)) != "", nil
if !staged {
return false, nil
}
if err := runGit(ctx, repoPath,
"-c", "commit.gpgsign=false",
"-c", "user.name=crawlkit",
"-c", "user.email=crawlkit@example.invalid",
"commit", "-m", message, "--", "manifest.json", "data", "pages",
); err != nil {
return false, err
}
return true, nil
}
func hasStagedGeneratedChanges(ctx context.Context, repoPath string) (bool, error) {
cmd := exec.CommandContext(ctx, "git", "-C", repoPath, "diff", "--cached", "--quiet", "--exit-code", "--", "manifest.json", "data", "pages")
out, err := cmd.CombinedOutput()
if err == nil {
return false, nil
}
var exitErr *exec.ExitError
if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 {
return true, nil
}
return false, fmt.Errorf("git diff --cached: %w\n%s", err, strings.TrimSpace(string(out)))
}
func runGit(ctx context.Context, dir string, args ...string) error {

View File

@ -3,7 +3,9 @@ package share
import (
"context"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"github.com/vincentkoc/notcrawl/internal/markdown"
@ -84,3 +86,153 @@ func TestPublishAndImportSnapshot(t *testing.T) {
t.Fatalf("expected imported search result, got %d", len(results))
}
}
func TestEnsureRepoUpdatesExistingOrigin(t *testing.T) {
ctx := context.Background()
repo := filepath.Join(t.TempDir(), "repo")
if err := os.MkdirAll(repo, 0o755); err != nil {
t.Fatal(err)
}
runGitForTest(t, repo, "init")
runGitForTest(t, repo, "remote", "add", "origin", "https://example.invalid/old.git")
const remote = "https://example.invalid/fresh.git"
if err := ensureRepo(ctx, repo, remote, "main"); err != nil {
t.Fatal(err)
}
got := gitOutputForTest(t, repo, "remote", "get-url", "origin")
if strings.TrimSpace(got) != remote {
t.Fatalf("origin = %q", got)
}
}
func TestPublishCommitsOnlyGeneratedSnapshotFiles(t *testing.T) {
ctx := context.Background()
repo := filepath.Join(t.TempDir(), "repo")
if err := os.MkdirAll(repo, 0o755); err != nil {
t.Fatal(err)
}
runGitForTest(t, repo, "init", "-b", "main")
notes := filepath.Join(repo, "notes.txt")
if err := os.WriteFile(notes, []byte("tracked\n"), 0o644); err != nil {
t.Fatal(err)
}
runGitForTest(t, repo, "add", "notes.txt")
runGitForTest(t, repo,
"-c", "commit.gpgsign=false",
"-c", "user.name=test",
"-c", "user.email=test@example.invalid",
"commit", "-m", "seed notes",
)
if err := os.WriteFile(notes, []byte("local edit\n"), 0o644); err != nil {
t.Fatal(err)
}
src, mdDir := snapshotStoreForTest(t, ctx, "Launch", "hello generated")
defer src.Close()
s, err := Publish(ctx, src, PublishOptions{RepoPath: repo, MarkdownDir: mdDir, Commit: true})
if err != nil {
t.Fatal(err)
}
if !s.Committed {
t.Fatal("expected generated snapshot commit")
}
status := gitOutputForTest(t, repo, "status", "--short", "--", "notes.txt")
if !strings.HasPrefix(status, " M notes.txt") {
t.Fatalf("expected unrelated tracked edit to remain unstaged, got %q", status)
}
committed := gitOutputForTest(t, repo, "show", "--name-only", "--format=", "HEAD")
if strings.Contains(committed, "notes.txt") {
t.Fatalf("unexpected unrelated file in snapshot commit:\n%s", committed)
}
}
func TestUpdatePullsExistingOriginWhenRemoteNotConfigured(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
remote := filepath.Join(dir, "remote.git")
runGitForTest(t, dir, "init", "--bare", remote)
seed := filepath.Join(dir, "seed")
if err := os.MkdirAll(seed, 0o755); err != nil {
t.Fatal(err)
}
runGitForTest(t, seed, "init", "-b", "main")
src, mdDir := snapshotStoreForTest(t, ctx, "Old", "old snapshot")
if _, err := Publish(ctx, src, PublishOptions{RepoPath: seed, MarkdownDir: mdDir, Commit: true}); err != nil {
t.Fatal(err)
}
if err := src.Close(); err != nil {
t.Fatal(err)
}
runGitForTest(t, seed, "remote", "add", "origin", remote)
runGitForTest(t, seed, "push", "-u", "origin", "main")
local := filepath.Join(dir, "local")
runGitForTest(t, dir, "clone", remote, local)
fresh, freshMD := snapshotStoreForTest(t, ctx, "Fresh", "fresh snapshot")
if _, err := Publish(ctx, fresh, PublishOptions{RepoPath: seed, Remote: remote, MarkdownDir: freshMD, Commit: true, Push: true}); err != nil {
t.Fatal(err)
}
if err := fresh.Close(); err != nil {
t.Fatal(err)
}
dst, err := store.Open(filepath.Join(dir, "dst.db"))
if err != nil {
t.Fatal(err)
}
defer dst.Close()
if _, err := Update(ctx, dst, "", local, "main"); err != nil {
t.Fatal(err)
}
results, err := dst.Search(ctx, "fresh", 10)
if err != nil {
t.Fatal(err)
}
if len(results) != 1 || results[0].Title != "Fresh" {
t.Fatalf("expected fresh pulled snapshot, got %#v", results)
}
}
func snapshotStoreForTest(t *testing.T, ctx context.Context, title, text string) (*store.Store, string) {
t.Helper()
st, err := store.Open(filepath.Join(t.TempDir(), "snapshot.db"))
if err != nil {
t.Fatal(err)
}
now := store.NowMS()
if err := st.UpsertPage(ctx, store.Page{ID: "page1", Title: title, Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertBlock(ctx, store.Block{ID: "block1", PageID: "page1", ParentID: "page1", Type: "text", Text: text, Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
mdDir := t.TempDir()
if _, err := (markdown.Exporter{Store: st, Dir: mdDir}).Export(ctx); err != nil {
t.Fatal(err)
}
return st, mdDir
}
func runGitForTest(t *testing.T, dir string, args ...string) {
t.Helper()
cmd := exec.Command("git", args...)
cmd.Dir = dir
if out, err := cmd.CombinedOutput(); err != nil {
t.Fatalf("git %s: %v\n%s", strings.Join(args, " "), err, strings.TrimSpace(string(out)))
}
}
func gitOutputForTest(t *testing.T, dir string, args ...string) string {
t.Helper()
cmd := exec.Command("git", args...)
cmd.Dir = dir
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("git %s: %v\n%s", strings.Join(args, " "), err, strings.TrimSpace(string(out)))
}
return string(out)
}

View File

@ -56,7 +56,10 @@ func (s *Store) Collection(ctx context.Context, id string) (Collection, error) {
func (s *Store) CollectionPages(ctx context.Context, collectionID string) ([]Page, error) {
rows, err := s.queryContext(ctx, `select id, space_id, parent_id, parent_table, collection_id, title, url, icon, cover,
properties_json, created_time, last_edited_time, alive, source, raw_json, synced_at
from pages where collection_id = ? and alive = 1 order by coalesce(last_edited_time, 0) desc, title`, collectionID)
from pages
where alive = 1
and (collection_id = ? or (parent_id = ? and parent_table in ('collection', 'database', 'data_source')))
order by coalesce(last_edited_time, 0) desc, title`, collectionID, collectionID)
if err != nil {
return nil, err
}
@ -94,7 +97,49 @@ func (s *Store) PageBlocks(ctx context.Context, pageID string) ([]Block, error)
b.Alive = IntBool(alive)
blocks = append(blocks, b)
}
return blocks, rows.Err()
if err := rows.Err(); err != nil {
return nil, err
}
return pageBlocksDisplayOrder(pageID, blocks), nil
}
func pageBlocksDisplayOrder(pageID string, blocks []Block) []Block {
children := map[string][]Block{}
for _, block := range blocks {
if block.ID == pageID {
continue
}
children[block.ParentID] = append(children[block.ParentID], block)
}
for parent := range children {
sortBlockSiblings(children[parent])
}
ordered := make([]Block, 0, len(blocks))
seen := map[string]struct{}{}
var appendChildren func(string)
appendChildren = func(parentID string) {
for _, block := range children[parentID] {
if _, ok := seen[block.ID]; ok {
continue
}
seen[block.ID] = struct{}{}
ordered = append(ordered, block)
appendChildren(block.ID)
}
}
appendChildren(pageID)
if len(ordered) == 0 {
return blocks
}
for _, block := range blocks {
if _, ok := seen[block.ID]; ok || block.ID == pageID {
continue
}
seen[block.ID] = struct{}{}
ordered = append(ordered, block)
}
return ordered
}
func (s *Store) PageComments(ctx context.Context, pageID string) ([]Comment, error) {
@ -119,6 +164,40 @@ func (s *Store) PageComments(ctx context.Context, pageID string) ([]Comment, err
return comments, rows.Err()
}
func (s *Store) UserNames(ctx context.Context) (map[string]string, error) {
rows, err := s.queryContext(ctx, `select id, coalesce(nullif(name, ''), nullif(email, ''), id) from users`)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]string{}
for rows.Next() {
var id, name string
if err := rows.Scan(&id, &name); err != nil {
return nil, err
}
out[id] = name
}
return out, rows.Err()
}
func (s *Store) PageTitles(ctx context.Context) (map[string]string, error) {
rows, err := s.queryContext(ctx, `select id, coalesce(nullif(title, ''), id) from pages where alive = 1`)
if err != nil {
return nil, err
}
defer rows.Close()
out := map[string]string{}
for rows.Next() {
var id, title string
if err := rows.Scan(&id, &title); err != nil {
return nil, err
}
out[id] = title
}
return out, rows.Err()
}
func (s *Store) SpaceNames(ctx context.Context) (map[string]string, error) {
rows, err := s.queryContext(ctx, `select id, name from spaces`)
if err != nil {

View File

@ -6,12 +6,11 @@ import (
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
_ "modernc.org/sqlite"
crawlstore "github.com/vincentkoc/crawlkit/store"
)
const schemaVersion = 1
@ -25,64 +24,34 @@ type Store struct {
}
func Open(path string) (*Store, error) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, err
}
if err := ensureDBFile(path); err != nil {
return nil, err
}
db, err := sql.Open("sqlite", sqliteDSN(path))
base, err := crawlstore.Open(context.Background(), crawlstore.Options{Path: path})
if err != nil {
return nil, err
}
db.SetMaxOpenConns(1)
db.SetMaxIdleConns(1)
db := base.DB()
if err := db.PingContext(context.Background()); err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
st := &Store{db: db, path: path}
if err := st.init(context.Background()); err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
return st, nil
}
func sqliteDSN(path string) string {
pragmas := "_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)"
if path == ":memory:" {
return "file::memory:?cache=shared&" + pragmas
func OpenReadOnly(path string) (*Store, error) {
base, err := crawlstore.OpenReadOnly(context.Background(), path)
if err != nil {
return nil, err
}
if strings.HasPrefix(path, "file:") {
sep := "?"
if strings.Contains(path, "?") {
sep = "&"
}
return path + sep + pragmas
db := base.DB()
if err := db.PingContext(context.Background()); err != nil {
_ = base.Close()
return nil, err
}
return "file:" + path + "?" + pragmas
}
func ensureDBFile(path string) error {
if path == ":memory:" || strings.HasPrefix(path, "file:") {
return nil
}
if _, err := os.Stat(path); err == nil {
return os.Chmod(path, 0o600)
} else if !errors.Is(err, os.ErrNotExist) {
return err
}
file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil && !errors.Is(err, os.ErrExist) {
return err
}
if file != nil {
if err := file.Close(); err != nil {
return err
}
}
return nil
return &Store{db: db, path: path}, nil
}
func (s *Store) DB() *sql.DB {

View File

@ -260,6 +260,37 @@ func TestStoreBuildsPageFTSInDisplayTreeOrder(t *testing.T) {
}
}
func TestStoreReturnsPageBlocksInDisplayTreeOrder(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {
t.Fatal(err)
}
defer st.Close()
ctx := context.Background()
now := NowMS()
if err := st.UpsertPage(ctx, Page{ID: "page1", Title: "Recipe", Alive: true, Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
blocks := []Block{
{ID: "z-root", PageID: "page1", ParentID: "page1", Type: "text", Text: "third", DisplayOrder: 2, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
{ID: "a-child", PageID: "page1", ParentID: "a-root", Type: "text", Text: "second", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
{ID: "a-root", PageID: "page1", ParentID: "page1", Type: "text", Text: "first", DisplayOrder: 1, CreatedTime: now, Alive: true, Source: "test", SyncedAt: now},
}
for _, block := range blocks {
if err := st.UpsertBlock(ctx, block); err != nil {
t.Fatal(err)
}
}
got, err := st.PageBlocks(ctx, "page1")
if err != nil {
t.Fatal(err)
}
if len(got) != 3 || got[0].ID != "a-root" || got[1].ID != "a-child" || got[2].ID != "z-root" {
t.Fatalf("unexpected block tree order: %+v", got)
}
}
func TestStoreResolvesPageTeamThroughCollectionParent(t *testing.T) {
st, err := Open(filepath.Join(t.TempDir(), "notcrawl.db"))
if err != nil {

View File

@ -31,6 +31,16 @@ type Summary struct {
Columns int
}
type exportColumn struct {
Key string
Header string
}
type referenceLabels struct {
Users map[string]string
Pages map[string]string
}
func (e Exporter) Export(ctx context.Context, databaseID string, format Format, w io.Writer) (Summary, error) {
if e.Store == nil {
return Summary{}, fmt.Errorf("missing store")
@ -46,21 +56,29 @@ func (e Exporter) Export(ctx context.Context, databaseID string, format Format,
if err != nil {
return Summary{}, err
}
refs, err := e.referenceLabels(ctx)
if err != nil {
return Summary{}, err
}
columns := columnsFor(collection, pages)
headers := make([]string, 0, len(columns))
for _, col := range columns {
headers = append(headers, col.Header)
}
writer := csv.NewWriter(w)
if format == FormatTSV {
writer.Comma = '\t'
} else if format != "" && format != FormatCSV {
return Summary{}, fmt.Errorf("unsupported format %q", format)
}
if err := writer.Write(columns); err != nil {
if err := writer.Write(headers); err != nil {
return Summary{}, err
}
for _, page := range pages {
props := decodeMap(page.PropertiesJSON)
row := make([]string, 0, len(columns))
for _, col := range columns {
switch col {
switch col.Key {
case "page_id":
row = append(row, page.ID)
case "page_title":
@ -68,7 +86,7 @@ func (e Exporter) Export(ctx context.Context, databaseID string, format Format,
case "url":
row = append(row, page.URL)
default:
row = append(row, propertyValueText(props[col]))
row = append(row, propertyValueText(props[col.Key], refs))
}
}
if err := writer.Write(row); err != nil {
@ -82,45 +100,95 @@ func (e Exporter) Export(ctx context.Context, databaseID string, format Format,
return Summary{Database: collection.ID, Rows: len(pages), Columns: len(columns)}, nil
}
func columnsFor(collection store.Collection, pages []store.Page) []string {
seen := map[string]bool{"page_id": true, "page_title": true, "url": true}
cols := []string{"page_id", "page_title", "url"}
for _, name := range schemaPropertyNames(collection.SchemaJSON) {
if !seen[name] {
seen[name] = true
cols = append(cols, name)
func (e Exporter) referenceLabels(ctx context.Context) (referenceLabels, error) {
users, err := e.Store.UserNames(ctx)
if err != nil {
return referenceLabels{}, err
}
pages, err := e.Store.PageTitles(ctx)
if err != nil {
return referenceLabels{}, err
}
return referenceLabels{Users: users, Pages: pages}, nil
}
func columnsFor(collection store.Collection, pages []store.Page) []exportColumn {
seenKeys := map[string]bool{"page_id": true, "page_title": true, "url": true}
seenHeaders := map[string]bool{"page_id": true, "page_title": true, "url": true}
cols := []exportColumn{
{Key: "page_id", Header: "page_id"},
{Key: "page_title", Header: "page_title"},
{Key: "url", Header: "url"},
}
for _, prop := range schemaProperties(collection.SchemaJSON) {
if !seenKeys[prop.Key] {
seenKeys[prop.Key] = true
prop.Header = uniqueHeader(prop.Header, prop.Key, seenHeaders)
cols = append(cols, prop)
}
}
var extras []string
var extras []exportColumn
for _, page := range pages {
for name := range decodeMap(page.PropertiesJSON) {
if !seen[name] {
seen[name] = true
extras = append(extras, name)
for key := range decodeMap(page.PropertiesJSON) {
if !seenKeys[key] {
seenKeys[key] = true
extras = append(extras, exportColumn{Key: key, Header: key})
}
}
}
sort.Strings(extras)
sort.Slice(extras, func(i, j int) bool {
return extras[i].Header < extras[j].Header
})
for i := range extras {
extras[i].Header = uniqueHeader(extras[i].Header, extras[i].Key, seenHeaders)
}
return append(cols, extras...)
}
func schemaPropertyNames(raw string) []string {
func schemaProperties(raw string) []exportColumn {
props := decodeMap(raw)
var title []string
var rest []string
for name, value := range props {
var title []exportColumn
var rest []exportColumn
for key, value := range props {
m, ok := value.(map[string]any)
header := key
if ok {
if name, ok := m["name"].(string); ok && strings.TrimSpace(name) != "" {
header = name
}
}
prop := exportColumn{Key: key, Header: header}
if ok && m["type"] == "title" {
title = append(title, name)
title = append(title, prop)
continue
}
rest = append(rest, name)
rest = append(rest, prop)
}
sort.Strings(title)
sort.Strings(rest)
sort.Slice(title, func(i, j int) bool {
return title[i].Header < title[j].Header
})
sort.Slice(rest, func(i, j int) bool {
return rest[i].Header < rest[j].Header
})
return append(title, rest...)
}
func uniqueHeader(header, key string, seen map[string]bool) string {
if strings.TrimSpace(header) == "" {
header = key
}
if !seen[header] {
seen[header] = true
return header
}
disambiguated := header + " (" + key + ")"
for i := 2; seen[disambiguated]; i++ {
disambiguated = fmt.Sprintf("%s (%s %d)", header, key, i)
}
seen[disambiguated] = true
return disambiguated
}
func decodeMap(raw string) map[string]any {
out := map[string]any{}
if strings.TrimSpace(raw) == "" {
@ -130,7 +198,10 @@ func decodeMap(raw string) map[string]any {
return out
}
func propertyValueText(v any) string {
func propertyValueText(v any, refs referenceLabels) string {
if text, ok := desktopValueText(v, refs); ok {
return text
}
m, ok := v.(map[string]any)
if !ok {
return notiontext.Plain(v)
@ -161,11 +232,11 @@ func propertyValueText(v any) string {
case "people", "files":
return joinNamed(m[typ])
case "relation":
return joinIDs(m[typ])
return joinIDs(m[typ], refs)
case "formula":
return formulaText(m["formula"])
return formulaText(m["formula"], refs)
case "rollup":
return rollupText(m["rollup"])
return rollupText(m["rollup"], refs)
case "created_by", "last_edited_by":
return namedObject(m[typ])
case "unique_id":
@ -174,6 +245,111 @@ func propertyValueText(v any) string {
return notiontext.Plain(v)
}
func desktopValueText(v any, refs referenceLabels) (string, bool) {
text, ok := desktopPlain(v, refs)
if !ok {
return "", false
}
text = notiontext.Normalize(strings.ReplaceAll(text, " , ", ", "))
return text, true
}
func desktopPlain(v any, refs referenceLabels) (string, bool) {
switch x := v.(type) {
case nil:
return "", true
case string:
if x == "‣" {
return "", true
}
return x, true
case []any:
if len(x) == 0 {
return "", true
}
if marker, ok := x[0].(string); ok {
if marker == "‣" && len(x) > 1 {
return desktopRefListText(x[1], refs), true
}
if marker == "," {
return ",", true
}
if marker != "" {
return marker, true
}
}
parts := make([]string, 0, len(x))
handled := false
for _, item := range x {
text, ok := desktopPlain(item, refs)
if !ok {
return "", false
}
handled = true
if text != "" {
parts = append(parts, text)
}
}
return strings.Join(parts, " "), handled
default:
return "", false
}
}
func desktopRefListText(v any, refs referenceLabels) string {
items, ok := v.([]any)
if !ok {
return notiontext.Plain(v)
}
parts := make([]string, 0, len(items))
for _, item := range items {
if text := desktopRefText(item, refs); text != "" {
parts = append(parts, text)
}
}
return strings.Join(parts, " ")
}
func desktopRefText(v any, refs referenceLabels) string {
item, ok := v.([]any)
if !ok || len(item) == 0 {
return notiontext.Plain(v)
}
typ, _ := item[0].(string)
switch typ {
case ",":
return ","
case "u":
if id, ok := stringAt(item, 1); ok {
return labelOrID(refs.Users, id)
}
case "p":
if id, ok := stringAt(item, 1); ok {
return labelOrID(refs.Pages, id)
}
case "d":
if len(item) > 1 {
return dateText(item[1])
}
}
return notiontext.Plain(v)
}
func stringAt(items []any, index int) (string, bool) {
if index >= len(items) {
return "", false
}
s, ok := items[index].(string)
return s, ok
}
func labelOrID(labels map[string]string, id string) string {
if label := labels[id]; label != "" {
return label
}
return id
}
func namedObject(v any) string {
m, ok := v.(map[string]any)
if !ok {
@ -182,6 +358,9 @@ func namedObject(v any) string {
if name, ok := m["name"].(string); ok {
return name
}
if value, ok := m["value"].(string); ok {
return value
}
if id, ok := m["id"].(string); ok {
return id
}
@ -202,7 +381,7 @@ func joinNamed(v any) string {
return strings.Join(parts, ", ")
}
func joinIDs(v any) string {
func joinIDs(v any, refs referenceLabels) string {
items, ok := v.([]any)
if !ok {
return ""
@ -214,7 +393,7 @@ func joinIDs(v any) string {
continue
}
if id, ok := m["id"].(string); ok {
parts = append(parts, id)
parts = append(parts, labelOrID(refs.Pages, id))
}
}
return strings.Join(parts, ", ")
@ -226,14 +405,20 @@ func dateText(v any) string {
return ""
}
start, _ := m["start"].(string)
if start == "" {
start, _ = m["start_date"].(string)
}
end, _ := m["end"].(string)
if end == "" {
end, _ = m["end_date"].(string)
}
if end != "" {
return start + "/" + end
}
return start
}
func formulaText(v any) string {
func formulaText(v any, refs referenceLabels) string {
m, ok := v.(map[string]any)
if !ok {
return ""
@ -252,10 +437,13 @@ func formulaText(v any) string {
case "date":
return dateText(m["date"])
}
if text, ok := desktopValueText(v, refs); ok {
return text
}
return notiontext.Plain(v)
}
func rollupText(v any) string {
func rollupText(v any, refs referenceLabels) string {
m, ok := v.(map[string]any)
if !ok {
return ""
@ -270,12 +458,15 @@ func rollupText(v any) string {
items, _ := m["array"].([]any)
parts := make([]string, 0, len(items))
for _, item := range items {
if text := propertyValueText(item); text != "" {
if text := propertyValueText(item, refs); text != "" {
parts = append(parts, text)
}
}
return strings.Join(parts, ", ")
}
if text, ok := desktopValueText(v, refs); ok {
return text
}
return notiontext.Plain(v)
}

View File

@ -20,13 +20,22 @@ func TestExportDatabaseTSV(t *testing.T) {
now := store.NowMS()
if err := st.UpsertCollection(ctx, store.Collection{
ID: "db1", Name: "Roadmap", Source: "test", SyncedAt: now,
SchemaJSON: `{"Name":{"type":"title"},"Status":{"type":"select"},"Score":{"type":"number"}}`,
SchemaJSON: `{"title":{"name":"Name","type":"title"},"assignee_id":{"name":"Assignee","type":"person"},"due_id":{"name":"Due","type":"date"},"status_id":{"name":"Status","type":"select"},"score_id":{"name":"Score","type":"number"}}`,
}); err != nil {
t.Fatal(err)
}
if err := st.UpsertUser(ctx, store.User{ID: "user1", Name: "Claire Pena", Source: "test", SyncedAt: now}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{
ID: "page1", CollectionID: "db1", Title: "Ship", URL: "https://example.com/ship", Alive: true, Source: "test", SyncedAt: now,
PropertiesJSON: `{"Name":{"type":"title","title":[{"plain_text":"Ship"}]},"Status":{"type":"select","select":{"name":"Done"}},"Score":{"type":"number","number":7}}`,
PropertiesJSON: `{"title":{"type":"title","title":[{"plain_text":"Ship"}]},"status_id":{"type":"select","select":{"name":"Done"}},"score_id":{"type":"number","number":7}}`,
}); err != nil {
t.Fatal(err)
}
if err := st.UpsertPage(ctx, store.Page{
ID: "page2", ParentID: "db1", ParentTable: "collection", Title: "Draft", URL: "https://example.com/draft", Alive: true, Source: "test", SyncedAt: now,
PropertiesJSON: `{"title":[["Draft"]],"assignee_id":[["‣",[["u","user1"]]]],"due_id":[["‣",[["d",{"type":"date","start_date":"2025-05-23"}]]]],"status_id":[["In progress"]],"score_id":[["3"]]}`,
}); err != nil {
t.Fatal(err)
}
@ -35,11 +44,15 @@ func TestExportDatabaseTSV(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if s.Rows != 1 {
t.Fatalf("expected one row, got %d", s.Rows)
if s.Rows != 2 {
t.Fatalf("expected two rows, got %d", s.Rows)
}
got := out.String()
for _, want := range []string{"page_id\tpage_title\turl\tName\tScore\tStatus", "page1\tShip\thttps://example.com/ship\tShip\t7\tDone"} {
for _, want := range []string{
"page_id\tpage_title\turl\tName\tAssignee\tDue\tScore\tStatus",
"page1\tShip\thttps://example.com/ship\tShip\t\t\t7\tDone",
"page2\tDraft\thttps://example.com/draft\tDraft\tClaire Pena\t2025-05-23\t3\tIn progress",
} {
if !strings.Contains(got, want) {
t.Fatalf("missing %q in:\n%s", want, got)
}