merge: use crawlkit infrastructure

* feat/use-crawlkit: (50 commits)
  fix(release): update version ldflag module path
  chore(deps): use crawlkit v0.4.0
  fix(tui): hydrate discord roots without thread scans
  fix(tui): limit discord thread hydration
  fix(tui): hydrate discord reply context
  fix(share): forward snapshot import progress
  fix(tui): browse newest discord messages
  fix(tui): show discord attachment details
  feat(tui): refresh discord archive rows
  fix(tui): resolve discord inline mentions
  fix(tui): render discord mention names
  docs: note shared tui polish
  fix(tui): document shared controls
  fix(tui): expose discord message details
  fix(tui): add Discord message URLs
  docs(tui): note dm pane labels
  fix(tui): label discord direct message panes
  fix(tui): use compact-pane crawlkit
  fix(tui): pick up shared detail renderer
  fix(sync): include progress percentages
  ...
This commit is contained in:
Vincent Koc 2026-05-05 18:20:49 -07:00
commit ebb41dabfd
No known key found for this signature in database
22 changed files with 1334 additions and 274 deletions

View File

@ -91,7 +91,19 @@ jobs:
}'
- name: Build
run: go build ./cmd/discrawl
run: go build -o bin/discrawl ./cmd/discrawl
- name: Smoke test CLI control surface
run: |
set -euo pipefail
output="$(./bin/discrawl help)"
printf '%s\n' "$output"
printf '%s' "$output" | grep -q "metadata"
printf '%s' "$output" | grep -q "tui"
test -n "$(./bin/discrawl --version)"
./bin/discrawl metadata --json | grep -q '"schema_version"'
./bin/discrawl status --json | grep -q '"databases"'
./bin/discrawl tui --json | grep -q '^\['
deps:
runs-on: ubuntu-latest

View File

@ -12,7 +12,7 @@ builds:
env:
- CGO_ENABLED=0
ldflags:
- -s -w -X github.com/steipete/discrawl/internal/cli.version={{ .Version }}
- -s -w -X github.com/openclaw/discrawl/internal/cli.version={{ .Version }}
targets:
- darwin_amd64
- darwin_arm64

View File

@ -26,10 +26,24 @@
- Refreshed dependency and CI tooling pins, including GoReleaser, `go-toml`, golangci-lint, and gosec.
- Tightened CI compatibility with the latest linters and made signal-cancellation and sync fixture tests deterministic under the race detector.
### Fixes
- Label direct-message TUI panes as direct messages instead of raw `@me` guild rows, keeping DM channel/person context readable.
- Inherit shared crawlkit TUI improvements for newest-first startup, count-header sorting, selected-message-first chat detail panes, and gitcrawl-style metadata labels.
- Surface Discord attachment filenames and extracted text in TUI detail panes instead of only showing `attachments=true`.
## 0.6.3 - 2026-05-01
### Changes
- Add crawlkit control metadata/status surfaces with `metadata --json`, `status --json`, and `doctor --json`.
- Add `tap` and `cache-import` as public desktop-cache import names while keeping `wiretap` as a documented legacy alias.
- Add `discrawl tui`, a terminal archive browser for stored guild messages and local `@me` wiretap DMs using the shared `crawlkit/tui` package.
- Render TUI rows with compact panes and expose pinned, attachment, reply, channel, and author metadata in the detail pane.
### Fixes
- Keep status and TUI reads safe for fresh or missing local databases without triggering git-share auto-update.
- Added OS keyring fallback for Discord bot-token resolution, keeping env as the first source and documenting the default keyring item. (#17)
- Clarified and locked down FTS query normalization so operator-like search terms such as `AND`, `OR`, `NOT`, `NEAR`, and `*` stay parameterized and quoted before SQLite `MATCH`. Thanks @mvanhorn.

View File

@ -22,6 +22,7 @@ Wiretap DMs stay local and are never exported to the Git-backed snapshot mirror.
- tails Gateway events for live updates, with periodic repair syncs
- imports classifiable Discord Desktop cache messages with `wiretap`, including proven DMs under `@me`
- publishes and imports private Git-backed archive snapshots for org-wide read access
- browses stored messages and local DMs in a terminal archive UI
- supports Git-only read mode with no Discord credentials on reader machines
- generates backup README activity reports, with optional AI-written field notes
- exposes read-only SQL for ad hoc analysis
@ -159,6 +160,17 @@ discrawl messages --channel general --hours 24
## Commands
### `tui`
Opens the local terminal archive browser for stored messages.
```bash
discrawl tui
discrawl tui --guild 123456789012345678 --channel general
discrawl tui --dm
discrawl --json tui --limit 50
```
### `init`
Creates the local config and discovers accessible guilds.
@ -683,6 +695,7 @@ go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.1 run
go test ./... -coverprofile=/tmp/discrawl.cover
go tool cover -func=/tmp/discrawl.cover | tail -n 1
go build ./cmd/discrawl
go run ./cmd/discrawl help | grep tui
```
Target coverage is `>= 85%`.

28
go.mod
View File

@ -5,26 +5,50 @@ go 1.26.2
require (
github.com/bwmarrin/discordgo v0.29.0
github.com/gorilla/websocket v1.5.3
github.com/pelletier/go-toml/v2 v2.3.1
github.com/stretchr/testify v1.11.1
github.com/zalando/go-keyring v0.2.8
golang.org/x/sys v0.43.0
golang.org/x/text v0.36.0
modernc.org/sqlite v1.50.0
)
require (
github.com/charmbracelet/bubbles v1.0.0 // indirect
github.com/clipperhouse/displaywidth v0.9.0 // indirect
github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.5.0 // indirect
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
modernc.org/sqlite v1.50.0 // indirect
)
require (
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/charmbracelet/bubbletea v1.3.10 // indirect
github.com/charmbracelet/colorprofile v0.4.1 // indirect
github.com/charmbracelet/lipgloss v1.1.0 // indirect
github.com/charmbracelet/x/ansi v0.11.6 // indirect
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
github.com/charmbracelet/x/term v0.2.2 // indirect
github.com/danieljoos/wincred v1.2.3 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/godbus/dbus/v5 v5.2.2 // indirect
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
github.com/mattn/go-isatty v0.0.22 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.19 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/vincentkoc/crawlkit v0.4.0
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/crypto v0.50.0 // indirect
golang.org/x/tools v0.44.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect

45
go.sum
View File

@ -1,5 +1,27 @@
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/bwmarrin/discordgo v0.29.0 h1:FmWeXFaKUwrcL3Cx65c20bTRW+vOb6k8AnaP+EgjDno=
github.com/bwmarrin/discordgo v0.29.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY=
github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc=
github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk=
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA=
github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA=
github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U=
github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/danieljoos/wincred v1.2.3 h1:v7dZC2x32Ut3nEfRH+vhoZGvN72+dQ/snVXo/vMFLdQ=
github.com/danieljoos/wincred v1.2.3/go.mod h1:6qqX0WNrS4RzPZ1tnroDzq9kY3fu1KwE7MRLQK4X0bs=
@ -7,6 +29,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ=
github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 h1:EwtI+Al+DeppwYX2oXJCETMO23COyaKGP6fHVpkpWpg=
@ -25,8 +49,20 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc=
@ -36,23 +72,32 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/vincentkoc/crawlkit v0.4.0 h1:1jQZAYbBivy6d7ewNdMZ8THgmJVwb+pQT0kH5Z9COHI=
github.com/vincentkoc/crawlkit v0.4.0/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs=
github.com/zalando/go-keyring v0.2.8/go.mod h1:tsMo+VpRq5NGyKfxoBVjCuMrG47yj8cmakZDO5QGii0=
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM=
golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=

View File

@ -17,6 +17,7 @@ import (
"github.com/openclaw/discrawl/internal/discord"
"github.com/openclaw/discrawl/internal/discorddesktop"
"github.com/openclaw/discrawl/internal/embed"
"github.com/openclaw/discrawl/internal/share"
"github.com/openclaw/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/syncer"
)
@ -314,16 +315,37 @@ func (r *runtime) runWiretap(args []string) error {
}
func (r *runtime) runStatus(args []string) error {
if len(args) != 0 {
fs := flag.NewFlagSet("status", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("status takes no arguments"))
}
if *jsonOut {
r.json = true
}
dbPath, err := config.ExpandPath(r.cfg.DBPath)
if err != nil {
return configErr(err)
}
status, err := r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
if err != nil {
return err
status := store.Status{DBPath: dbPath, DefaultGuildID: r.cfg.EffectiveDefaultGuildID()}
if r.store != nil {
status, err = r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
if err != nil {
return err
}
}
if r.json {
needsUpdate := false
if r.store != nil && r.cfg.ShareEnabled() {
if staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter); err == nil {
needsUpdate = share.NeedsImport(r.ctx, r.store, staleAfter)
}
}
return r.print(controlStatus(r.configPath, r.cfg, status, needsUpdate))
}
return r.print(status)
}
@ -384,9 +406,18 @@ func (r *runtime) runEmbed(args []string) error {
}
func (r *runtime) runDoctor(args []string) error {
if len(args) != 0 {
fs := flag.NewFlagSet("doctor", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("doctor takes no arguments"))
}
if *jsonOut {
r.json = true
}
report := map[string]any{
"config_path": r.configPath,
}

View File

@ -47,6 +47,10 @@ func ExitCode(err error) int {
}
func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
printUsage(stdout)
return nil
}
global := flag.NewFlagSet("discrawl", flag.ContinueOnError)
global.SetOutput(io.Discard)
configPath := global.String("config", "", "")
@ -66,10 +70,14 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
return nil
}
rest := global.Args()
if len(rest) == 0 || rest[0] == "help" {
if len(rest) == 0 || rest[0] == "help" || rest[0] == "--help" || rest[0] == "-h" {
printUsage(stdout)
return nil
}
if rest[0] == "version" {
_, _ = io.WriteString(stdout, version+"\n")
return nil
}
level := slog.LevelInfo
if *quiet {
level = slog.LevelError
@ -129,6 +137,8 @@ type attachmentTextConfigurer interface {
func (r *runtime) dispatch(rest []string) error {
switch rest[0] {
case "metadata":
return r.runMetadata(rest[1:])
case "init":
return r.runInit(rest[1:])
case "sync":
@ -141,9 +151,16 @@ func (r *runtime) dispatch(rest []string) error {
return r.withServicesLocked(true, func() error { return r.runTail(rest[1:]) })
case "wiretap":
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
case "tap", "cache-import":
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
case "search":
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
return r.withLocalStoreDefaultLocked(autoShareUpdate, autoShareUpdate, func() error { return r.runSearch(rest[1:]) })
case "tui":
if hasHelpArg(rest[1:]) {
return r.runTUI(rest[1:])
}
return r.withLocalStoreReadOnly(func() error { return r.runTUI(rest[1:]) })
case "messages":
if hasBoolFlag(rest[1:], "--sync") && !hasBoolFlag(rest[1:], "--dm") {
return r.withServicesAutoLocked(true, true, true, func() error { return r.runMessages(rest[1:]) })
@ -167,7 +184,7 @@ func (r *runtime) dispatch(rest []string) error {
case "channels":
return r.withLocalStoreLocked(true, func() error { return r.runChannels(rest[1:]) })
case "status":
return r.withLocalStoreLocked(true, func() error { return r.runStatus(rest[1:]) })
return r.withLocalStoreReadOnly(func() error { return r.runStatus(rest[1:]) })
case "report":
return r.withLocalStoreLocked(true, func() error { return r.runReport(rest[1:]) })
case "publish":
@ -249,6 +266,35 @@ func (r *runtime) openLocalStore(dbPath string, updateMode shareUpdateMode, fn f
return fn()
}
func (r *runtime) withLocalStoreReadOnly(fn func() error) error {
cfg, err := config.Load(r.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return configErr(err)
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return configErr(err)
}
}
dbPath, err := config.ExpandPath(cfg.DBPath)
if err != nil {
return configErr(err)
}
r.cfg = cfg
var openErr error
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
if openErr != nil {
if errors.Is(openErr, os.ErrNotExist) {
r.store = nil
return fn()
}
return dbErr(openErr)
}
defer func() { _ = r.store.Close() }()
return fn()
}
func (r *runtime) withServicesAuto(withDiscord, autoShareUpdate bool, fn func() error) error {
return r.withServicesAutoLocked(withDiscord, autoShareUpdate, false, fn)
}

View File

@ -76,6 +76,21 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
NormalizedContent: "panic locked database",
RawJSON: `{}`,
}))
require.NoError(t, s.UpsertGuild(ctx, store.GuildRecord{ID: "g2", Name: "Other Guild", RawJSON: `{}`}))
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g2", Kind: "text", Name: "random", RawJSON: `{}`}))
require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{
ID: "m-other",
GuildID: "g2",
ChannelID: "c2",
ChannelName: "random",
AuthorID: "u2",
AuthorName: "Outside",
MessageType: 0,
CreatedAt: time.Now().UTC().Add(-time.Hour).Format(time.RFC3339Nano),
Content: "outside default guild",
NormalizedContent: "outside default guild",
RawJSON: `{}`,
}))
require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{
ID: "m2",
GuildID: "g1",
@ -137,6 +152,35 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
require.NoError(t, Run(ctx, args, &out, &bytes.Buffer{}))
require.NotEmpty(t, out.String())
}
before, err := os.ReadFile(dbPath)
require.NoError(t, err)
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "tui", "--limit", "5"}, &out, &bytes.Buffer{}))
var rows []map[string]any
require.NoError(t, json.Unmarshal(out.Bytes(), &rows))
require.NotEmpty(t, rows)
require.Equal(t, "panic locked database", rows[0]["title"])
require.Equal(t, "discord", rows[0]["source"])
require.Equal(t, "message", rows[0]["kind"])
require.Equal(t, "Guild", rows[0]["scope"])
require.Equal(t, "general", rows[0]["container"])
require.Equal(t, "https://discord.com/channels/g1/c1/m1", rows[0]["url"])
after, err := os.ReadFile(dbPath)
require.NoError(t, err)
require.Equal(t, before, after, "tui --json should not mutate the database")
}
func TestTUIHelpReturnsUsage(t *testing.T) {
var stdout bytes.Buffer
var stderr bytes.Buffer
require.NoError(t, Run(context.Background(), []string{"tui", "--help"}, &stdout, &stderr))
require.Contains(t, stdout.String(), "Usage of tui:")
require.Contains(t, stdout.String(), "-limit")
require.Contains(t, stdout.String(), "right-click")
require.Contains(t, stdout.String(), "# jump")
require.Empty(t, stderr.String())
}
func TestWiretapImportsDesktopDirectMessages(t *testing.T) {
@ -183,6 +227,53 @@ func TestWiretapImportsDesktopDirectMessages(t *testing.T) {
require.Contains(t, out.String(), "secret DM launch plan")
}
func TestDiscordTUIRowsIncludePaneMetadata(t *testing.T) {
rows := discordTUIRows([]store.MessageRow{{
MessageID: "m1",
GuildID: "@me",
GuildName: "Discord Direct Messages",
ChannelID: "c1",
ChannelName: "Vincent K",
AuthorID: "u1",
AuthorName: "Peter",
Content: "hello from desktop",
DisplayContent: "hello from Vincent",
CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC),
ReplyToMessage: "m0",
HasAttachments: true,
AttachmentNames: "trace.txt",
AttachmentText: "stack trace line one",
Pinned: true,
}})
require.Len(t, rows, 1)
require.Equal(t, "hello from Vincent", rows[0].Title)
require.Contains(t, rows[0].Detail, "hello from Vincent")
require.Contains(t, rows[0].Detail, "Attachments")
require.Contains(t, rows[0].Detail, "stack trace line one")
require.Equal(t, "hello from Vincent", rows[0].Text)
require.Equal(t, "Direct messages", rows[0].Scope)
require.Equal(t, "Vincent K", rows[0].Container)
require.Contains(t, rows[0].Tags, "dm")
require.Equal(t, "true", rows[0].Fields["attachments"])
require.Equal(t, "trace.txt", rows[0].Fields["attachment_names"])
require.Equal(t, "true", rows[0].Fields["pinned"])
require.Equal(t, "m0", rows[0].Fields["reply_to"])
require.Equal(t, "@me", rows[0].Fields["guild_id"])
rows = discordTUIRows([]store.MessageRow{{
MessageID: "m2",
GuildID: "g1",
ChannelID: "c2",
AuthorID: "439223656200273932",
Content: "desktop-only author",
CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC),
Source: "discord_desktop",
}})
require.Equal(t, "user:439223...3932", rows[0].Author)
require.Equal(t, "DM c2", discordContainerLabel(store.MessageRow{GuildID: "@me", ChannelID: "c2"}))
require.Contains(t, rows[0].Tags, "discord_desktop")
}
func TestParseMessageWindow(t *testing.T) {
rt := &runtime{now: func() time.Time {
return time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)

View File

@ -0,0 +1,96 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"os"
"time"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/store"
"github.com/vincentkoc/crawlkit/control"
)
func (r *runtime) runMetadata(args []string) error {
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("metadata takes flags only"))
}
if *jsonOut {
r.json = true
}
cfg := config.Default()
manifest := control.NewManifest("discrawl", "Discord Crawl", "discrawl")
manifest.Description = "Local-first Discord archive crawler."
manifest.Branding = control.Branding{SymbolName: "bubble.left.and.bubble.right.fill", AccentColor: "#5865f2", BundleIdentifier: "com.hnc.Discord"}
manifest.Paths = control.Paths{
DefaultConfig: config.ResolvePath(""),
ConfigEnv: config.DefaultConfigEnv,
DefaultDatabase: cfg.DBPath,
DefaultCache: cfg.CacheDir,
DefaultLogs: cfg.LogDir,
DefaultShare: cfg.Share.RepoPath,
}
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "tap", "tui", "git-share", "sql", "embeddings"}
manifest.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"discord", "desktop-cache", "sqlite", "git-share"}}
manifest.Commands = map[string]control.Command{
"status": {Title: "Status", Argv: []string{"discrawl", "status", "--json"}, JSON: true},
"doctor": {Title: "Doctor", Argv: []string{"discrawl", "doctor", "--json"}, JSON: true},
"sync": {Title: "Sync", Argv: []string{"discrawl", "--json", "sync"}, JSON: true, Mutates: true},
"tap": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "tap"}, JSON: true, Mutates: true},
"cache-import": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "cache-import"}, JSON: true, Mutates: true},
"wiretap": {Title: "Legacy desktop cache import", Argv: []string{"discrawl", "--json", "wiretap"}, JSON: true, Mutates: true, Legacy: true, Deprecated: true},
"tui": {Title: "Terminal browser", Argv: []string{"discrawl", "tui"}},
"tui-json": {Title: "Terminal browser rows", Argv: []string{"discrawl", "tui", "--json"}, JSON: true},
"publish": {Title: "Publish share", Argv: []string{"discrawl", "--json", "publish"}, JSON: true, Mutates: true},
"subscribe": {Title: "Subscribe share", Argv: []string{"discrawl", "--json", "subscribe"}, JSON: true, Mutates: true},
"update": {Title: "Update share", Argv: []string{"discrawl", "--json", "update"}, JSON: true, Mutates: true},
}
return r.print(manifest)
}
func controlStatus(configPath string, cfg config.Config, status store.Status, shareNeedsUpdate bool) control.Status {
counts := []control.Count{
control.NewCount("guilds", "Guilds", int64(status.GuildCount)),
control.NewCount("channels", "Channels", int64(status.ChannelCount)),
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
control.NewCount("messages", "Messages", int64(status.MessageCount)),
control.NewCount("members", "Members", int64(status.MemberCount)),
control.NewCount("embedding_backlog", "Embedding backlog", int64(status.EmbeddingBacklog)),
}
out := control.NewStatus("discrawl", fmt.Sprintf("%d messages across %d channels", status.MessageCount, status.ChannelCount))
out.State = "current"
out.ConfigPath = configPath
out.DatabasePath = status.DBPath
out.Counts = counts
if !status.LastSyncAt.IsZero() {
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
}
db := control.SQLiteDatabase("primary", "Discord archive", "archive", status.DBPath, true, counts)
out.DatabaseBytes = db.Bytes
out.WALBytes = fileSize(status.DBPath + "-wal")
out.Databases = []control.Database{db}
out.Share = &control.Share{
Enabled: cfg.ShareEnabled(),
RepoPath: cfg.Share.RepoPath,
Remote: cfg.Share.Remote,
Branch: cfg.Share.Branch,
NeedsUpdate: shareNeedsUpdate,
}
return out
}
func fileSize(path string) int64 {
info, err := os.Stat(path)
if err != nil {
return 0
}
return info.Size()
}

View File

@ -100,11 +100,16 @@ Usage:
discrawl [global flags] <command> [args]
Commands:
metadata
version
init
sync
tail
tap
cache-import
wiretap
search
tui
messages
digest
analytics

View File

@ -96,3 +96,12 @@ func hasBoolFlag(args []string, name string) bool {
}
return false
}
func hasHelpArg(args []string) bool {
for _, arg := range args {
if arg == "help" || arg == "--help" || arg == "-h" {
return true
}
}
return false
}

View File

@ -0,0 +1,239 @@
package cli
import (
"context"
"errors"
"flag"
"fmt"
"strings"
"github.com/vincentkoc/crawlkit/tui"
"github.com/openclaw/discrawl/internal/store"
)
func (r *runtime) runTUI(args []string) error {
fs := flag.NewFlagSet("tui", flag.ContinueOnError)
fs.SetOutput(r.stderr)
fs.Usage = func() {
_, _ = fmt.Fprintln(fs.Output(), "Usage of tui:")
fs.PrintDefaults()
_, _ = fmt.Fprintln(fs.Output())
_, _ = fmt.Fprintln(fs.Output(), tui.ControlsHelp())
}
if hasHelpArg(args) {
fs.SetOutput(r.stdout)
}
channel := fs.String("channel", "", "channel id")
author := fs.String("author", "", "author/user id")
limit := fs.Int("limit", 200, "row limit")
includeEmpty := fs.Bool("include-empty", false, "include empty messages")
dm := fs.Bool("dm", false, "browse direct messages")
guildsFlag := fs.String("guilds", "", "comma-separated guild ids")
guildFlag := fs.String("guild", "", "guild id")
jsonOut := fs.Bool("json", false, "write browser rows as JSON")
if len(args) == 1 && args[0] == "help" {
fs.Usage()
return nil
}
if err := fs.Parse(args); err != nil {
if errors.Is(err, flag.ErrHelp) {
return nil
}
return usageErr(err)
}
if *jsonOut {
r.json = true
}
if fs.NArg() != 0 {
return usageErr(errors.New("tui takes flags only"))
}
if *limit <= 0 {
return usageErr(errors.New("tui --limit must be positive"))
}
guildIDs, err := r.resolveTUIGuilds(*dm, *guildFlag, *guildsFlag)
if err != nil {
return usageErr(err)
}
if r.store == nil {
return tui.Browse(r.ctx, tui.BrowseOptions{
AppName: "discrawl",
Title: "discrawl archive",
EmptyMessage: "discrawl has no local messages yet",
JSON: r.json,
Layout: tui.LayoutChat,
SourceKind: r.archiveSourceKind(),
SourceLocation: r.archiveSourceLocation(),
Stdout: r.stdout,
})
}
loadRows := func() ([]tui.Row, error) {
rows, err := r.store.ListMessagesWithThreadContext(r.ctx, store.MessageListOptions{
GuildIDs: guildIDs,
Channel: *channel,
Author: *author,
Last: *limit,
IncludeEmpty: *includeEmpty,
})
if err != nil {
return nil, err
}
return discordTUIRows(rows), nil
}
archiveRows, err := loadRows()
if err != nil {
return err
}
return tui.Browse(r.ctx, tui.BrowseOptions{
AppName: "discrawl",
Title: "discrawl archive",
EmptyMessage: "discrawl has no local messages yet",
Rows: archiveRows,
Refresh: func(context.Context) ([]tui.Row, error) { return loadRows() },
JSON: r.json,
Layout: tui.LayoutChat,
SourceKind: r.archiveSourceKind(),
SourceLocation: r.archiveSourceLocation(),
Stdout: r.stdout,
})
}
func (r *runtime) resolveTUIGuilds(dm bool, guild, guilds string) ([]string, error) {
guildIDs, err := directMessageGuildScope(dm, guild, guilds)
if err != nil || dm || len(guildIDs) > 0 {
return guildIDs, err
}
if defaultGuild := r.cfg.EffectiveDefaultGuildID(); defaultGuild != "" {
return []string{defaultGuild}, nil
}
return nil, nil
}
func (r *runtime) archiveSourceKind() string {
if strings.TrimSpace(r.cfg.Share.Remote) != "" {
return tui.SourceRemote
}
return tui.SourceLocal
}
func (r *runtime) archiveSourceLocation() string {
if strings.TrimSpace(r.cfg.Share.Remote) != "" {
return r.cfg.Share.Remote
}
return r.cfg.DBPath
}
func discordTUIRows(rows []store.MessageRow) []tui.Row {
items := make([]tui.Row, 0, len(rows))
for _, row := range rows {
content := discordDisplayContent(row)
title := strings.TrimSpace(content)
detail := discordDetailContent(row, content)
if title == "" {
title = firstNonEmpty(strings.TrimSpace(row.AttachmentText), row.MessageID)
}
tags := []string{row.GuildID, row.ChannelID}
if row.GuildID == "@me" {
tags = append(tags, "dm")
}
if row.Source != "" {
tags = append(tags, row.Source)
}
items = append(items, tui.Row{
Source: "discord",
Kind: "message",
ID: row.MessageID,
ParentID: row.ReplyToMessage,
Scope: discordScopeLabel(row),
Container: discordContainerLabel(row),
Author: discordAuthorLabel(row),
Title: title,
Text: content,
Detail: detail,
URL: discordMessageURL(row),
CreatedAt: formatTime(row.CreatedAt),
Tags: tags,
Fields: map[string]string{
"attachment_names": row.AttachmentNames,
"attachments": boolString(row.HasAttachments),
"author_id": row.AuthorID,
"channel_id": row.ChannelID,
"guild_id": row.GuildID,
"pinned": boolString(row.Pinned),
"reply_to": row.ReplyToMessage,
"source": row.Source,
},
})
}
return items
}
func discordDetailContent(row store.MessageRow, content string) string {
var parts []string
if strings.TrimSpace(content) != "" {
parts = append(parts, strings.TrimSpace(content))
}
if strings.TrimSpace(row.AttachmentText) != "" {
parts = append(parts, "Attachments\n"+strings.TrimSpace(row.AttachmentText))
}
if len(parts) == 0 {
return ""
}
return strings.Join(parts, "\n\n")
}
func discordDisplayContent(row store.MessageRow) string {
if content := strings.TrimSpace(row.DisplayContent); content != "" {
return content
}
return row.Content
}
func discordMessageURL(row store.MessageRow) string {
guildID := strings.TrimSpace(row.GuildID)
channelID := strings.TrimSpace(row.ChannelID)
messageID := strings.TrimSpace(row.MessageID)
if guildID == "" || channelID == "" || messageID == "" {
return ""
}
return "https://discord.com/channels/" + guildID + "/" + channelID + "/" + messageID
}
func discordScopeLabel(row store.MessageRow) string {
if row.GuildID == "@me" {
return "Direct messages"
}
return firstNonEmpty(row.GuildName, row.GuildID)
}
func discordContainerLabel(row store.MessageRow) string {
if row.GuildID == "@me" {
return firstNonEmpty(row.ChannelName, "DM "+compactDiscordID(row.ChannelID))
}
return firstNonEmpty(row.ChannelName, row.ChannelID)
}
func discordAuthorLabel(row store.MessageRow) string {
if name := strings.TrimSpace(row.AuthorName); name != "" {
return name
}
if id := strings.TrimSpace(row.AuthorID); id != "" {
return "user:" + compactDiscordID(id)
}
return ""
}
func compactDiscordID(id string) string {
id = strings.TrimSpace(id)
if len(id) <= 10 {
return id
}
return id[:6] + "..." + id[len(id)-4:]
}
func boolString(value bool) string {
if value {
return "true"
}
return ""
}

View File

@ -9,7 +9,7 @@ import (
"strings"
"time"
"github.com/pelletier/go-toml/v2"
crawlconfig "github.com/vincentkoc/crawlkit/config"
)
const (
@ -85,14 +85,25 @@ type TokenResolution struct {
Path string
}
var appConfig = crawlconfig.App{Name: "discrawl", ConfigEnv: DefaultConfigEnv, BaseDir: "~/.discrawl", LegacyBaseDir: "~/.discrawl"}
func Default() Config {
home, _ := os.UserHomeDir()
base := filepath.Join(home, ".discrawl")
paths, err := appConfig.DefaultPaths()
if err != nil {
base := filepath.Join(home, ".discrawl")
paths = crawlconfig.Paths{
DBPath: filepath.Join(base, "discrawl.db"),
CacheDir: filepath.Join(base, "cache"),
LogDir: filepath.Join(base, "logs"),
ShareDir: filepath.Join(base, "share"),
}
}
return Config{
Version: 1,
DBPath: filepath.Join(base, "discrawl.db"),
CacheDir: filepath.Join(base, "cache"),
LogDir: filepath.Join(base, "logs"),
DBPath: paths.DBPath,
CacheDir: paths.CacheDir,
LogDir: paths.LogDir,
DefaultGuildID: "",
Discord: DiscordConfig{
TokenSource: "env",
@ -124,7 +135,7 @@ func Default() Config {
},
},
Share: ShareConfig{
RepoPath: filepath.Join(base, "share"),
RepoPath: paths.ShareDir,
Branch: "main",
AutoUpdate: true,
StaleAfter: "15m",
@ -145,14 +156,12 @@ func defaultSyncConcurrency() int {
}
func ResolvePath(flagPath string) string {
if strings.TrimSpace(flagPath) != "" {
return flagPath
path, err := appConfig.ResolveConfigPath(flagPath)
if err != nil {
home, _ := os.UserHomeDir()
return filepath.Join(home, ".discrawl", "config.toml")
}
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
return envPath
}
home, _ := os.UserHomeDir()
return filepath.Join(home, ".discrawl", "config.toml")
return path
}
func Load(path string) (Config, error) {
@ -161,13 +170,9 @@ func Load(path string) (Config, error) {
if err != nil {
return Config{}, err
}
data, err := os.ReadFile(expanded)
if err != nil {
if err := crawlconfig.LoadTOML(expanded, &cfg); err != nil {
return Config{}, err
}
if err := toml.Unmarshal(data, &cfg); err != nil {
return Config{}, fmt.Errorf("parse config: %w", err)
}
if err := cfg.Normalize(); err != nil {
return Config{}, err
}
@ -182,14 +187,7 @@ func Write(path string, cfg Config) error {
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(expanded), 0o755); err != nil {
return fmt.Errorf("mkdir config dir: %w", err)
}
data, err := toml.Marshal(cfg)
if err != nil {
return fmt.Errorf("marshal config: %w", err)
}
return os.WriteFile(expanded, data, 0o600)
return crawlconfig.WriteTOML(expanded, cfg, 0o600)
}
func (c *Config) Normalize() error {
@ -343,35 +341,18 @@ func (c Config) ShareEnabled() bool {
}
func EnsureRuntimeDirs(cfg Config) error {
paths := []string{cfg.CacheDir, cfg.LogDir, filepath.Dir(cfg.DBPath)}
for _, path := range paths {
expanded, err := ExpandPath(path)
if err != nil {
return err
}
if err := os.MkdirAll(expanded, 0o755); err != nil {
return fmt.Errorf("mkdir %s: %w", expanded, err)
}
}
return nil
return crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{
DBPath: cfg.DBPath,
CacheDir: cfg.CacheDir,
LogDir: cfg.LogDir,
})
}
func ExpandPath(path string) (string, error) {
if strings.TrimSpace(path) == "" {
return "", errors.New("empty path")
}
if strings.HasPrefix(path, "~/") || path == "~" {
home, err := os.UserHomeDir()
if err != nil {
return "", fmt.Errorf("home dir: %w", err)
}
if path == "~" {
path = home
} else {
path = filepath.Join(home, strings.TrimPrefix(path, "~/"))
}
}
return filepath.Clean(os.ExpandEnv(path)), nil
return filepath.Clean(os.ExpandEnv(crawlconfig.ExpandHome(path))), nil
}
func uniqueStrings(in []string) []string {

View File

@ -12,12 +12,13 @@ import (
"os"
"os/exec"
"path/filepath"
"slices"
"strconv"
"strings"
"time"
"github.com/openclaw/discrawl/internal/store"
"github.com/vincentkoc/crawlkit/mirror"
"github.com/vincentkoc/crawlkit/snapshot"
)
const (
@ -27,7 +28,7 @@ const (
directMessageGuildID = "@me"
)
var ErrNoManifest = errors.New("share manifest not found")
var ErrNoManifest = snapshot.ErrNoManifest
const shardFlushRows = 1024
@ -73,13 +74,7 @@ type Manifest struct {
Files map[string]string `json:"files,omitempty"`
}
type TableManifest struct {
Name string `json:"name"`
File string `json:"file,omitempty"`
Files []string `json:"files,omitempty"`
Columns []string `json:"columns"`
Rows int `json:"rows"`
}
type TableManifest = snapshot.TableManifest
type EmbeddingManifest struct {
Provider string `json:"provider"`
@ -94,120 +89,52 @@ func EnsureRepo(ctx context.Context, opts Options) error {
if strings.TrimSpace(opts.RepoPath) == "" {
return errors.New("share repo path is empty")
}
if _, err := os.Stat(filepath.Join(opts.RepoPath, ".git")); err == nil {
return nil
}
if strings.TrimSpace(opts.Remote) != "" {
if err := os.MkdirAll(filepath.Dir(opts.RepoPath), 0o755); err != nil {
return fmt.Errorf("mkdir share parent: %w", err)
}
if err := run(ctx, "", "git", "clone", opts.Remote, opts.RepoPath); err != nil {
return err
}
if strings.TrimSpace(opts.Branch) != "" {
if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", opts.Branch); err != nil {
return err
}
}
return nil
}
if err := os.MkdirAll(opts.RepoPath, 0o755); err != nil {
return fmt.Errorf("mkdir share repo: %w", err)
}
if err := run(ctx, opts.RepoPath, "git", "init"); err != nil {
return err
}
if strings.TrimSpace(opts.Branch) != "" {
if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", opts.Branch); err != nil {
return err
}
}
return nil
return mirror.EnsureRepo(ctx, mirrorOptions(opts))
}
func Pull(ctx context.Context, opts Options) error {
if strings.TrimSpace(opts.Remote) == "" {
if strings.TrimSpace(opts.Remote) == "" && strings.TrimSpace(opts.RepoPath) == "" {
return nil
}
if err := EnsureRepo(ctx, opts); err != nil {
return err
}
if err := run(ctx, opts.RepoPath, "git", "fetch", "--prune", "origin"); err != nil {
return err
}
branch := opts.Branch
if strings.TrimSpace(branch) == "" {
branch = "main"
}
remoteRef := "refs/remotes/origin/" + branch
if _, err := output(ctx, opts.RepoPath, "git", "rev-parse", "--verify", remoteRef); err != nil {
return run(ctx, opts.RepoPath, "git", "checkout", "-B", branch)
}
if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", branch, "origin/"+branch); err != nil {
return err
}
return run(ctx, opts.RepoPath, "git", "pull", "--ff-only", "origin", branch)
return mirror.Pull(ctx, mirrorOptions(opts))
}
func Commit(ctx context.Context, opts Options, message string) (bool, error) {
if err := run(ctx, opts.RepoPath, "git", "add", "."); err != nil {
return false, err
}
out, err := output(ctx, opts.RepoPath, "git", "status", "--porcelain")
if err != nil {
return false, err
}
if strings.TrimSpace(out) == "" {
return false, nil
}
if strings.TrimSpace(message) == "" {
message = "sync: discord archive"
}
if err := run(ctx, opts.RepoPath, "git", "commit", "-m", message); err != nil {
return false, err
}
return true, nil
return mirror.Commit(ctx, mirrorOptions(opts), message)
}
func Push(ctx context.Context, opts Options) error {
branch := opts.Branch
if strings.TrimSpace(branch) == "" {
branch = "main"
if err := mirror.Push(ctx, mirrorOptions(opts)); err != nil {
branch := opts.Branch
if strings.TrimSpace(branch) == "" {
branch = "main"
}
return fmt.Errorf("git push -u origin %s: %w", branch, err)
}
out, err := output(ctx, opts.RepoPath, "git", "push", "-u", "origin", branch)
if err == nil {
return nil
}
if !isNonFastForwardPush(out) {
return fmt.Errorf("git push -u origin %s: %w\n%s", branch, err, strings.TrimSpace(out))
}
if pullErr := run(ctx, opts.RepoPath, "git", "pull", "--rebase", "--autostash", "origin", branch); pullErr != nil {
return fmt.Errorf("rebase before push retry: %w", pullErr)
}
return run(ctx, opts.RepoPath, "git", "push", "-u", "origin", branch)
return nil
}
func Export(ctx context.Context, s *store.Store, opts Options) (Manifest, error) {
if err := EnsureRepo(ctx, opts); err != nil {
return Manifest{}, err
}
if err := os.RemoveAll(filepath.Join(opts.RepoPath, "tables")); err != nil {
return Manifest{}, fmt.Errorf("reset tables dir: %w", err)
}
if err := os.MkdirAll(filepath.Join(opts.RepoPath, "tables"), 0o755); err != nil {
return Manifest{}, fmt.Errorf("mkdir tables dir: %w", err)
base, err := snapshot.Export(ctx, snapshot.ExportOptions{
DB: s.DB(),
RootDir: opts.RepoPath,
Tables: SnapshotTables,
MaxShardBytes: maxShardBytes,
Filter: func(table string, row map[string]any) (bool, error) {
return !isDirectMessageSnapshotRow(table, row), nil
},
})
if err != nil {
return Manifest{}, err
}
manifest := Manifest{
Version: 1,
GeneratedAt: time.Now().UTC(),
Files: map[string]string{"manifest": ManifestName},
}
for _, table := range SnapshotTables {
entry, err := exportTable(ctx, s.DB(), opts.RepoPath, table)
if err != nil {
return Manifest{}, err
}
manifest.Tables = append(manifest.Tables, entry)
Version: base.Version,
GeneratedAt: base.GeneratedAt,
Tables: base.Tables,
Files: base.Files,
}
if opts.IncludeEmbeddings {
entry, err := exportEmbeddings(ctx, s.DB(), opts)
@ -243,53 +170,51 @@ func Import(ctx context.Context, s *store.Store, opts Options) (Manifest, error)
_ = restorePragmas(ctx)
}
}()
tx, err := s.DB().BeginTx(ctx, nil)
if err != nil {
if _, err := snapshot.Import(ctx, snapshot.ImportOptions{
DB: s.DB(),
RootDir: opts.RepoPath,
DeleteTables: SnapshotTables,
Progress: func(progress snapshot.ImportProgress) {
opts.reportProgress(ImportProgress{
Phase: progress.Phase,
Table: progress.Table,
File: progress.File,
FileIndex: progress.FileIndex,
FileCount: progress.FileCount,
Rows: progress.Rows,
TotalRows: progress.TotalRows,
})
},
Filter: func(table string, row map[string]any) (bool, error) {
return !isDirectMessageSnapshotRow(table, row), nil
},
BeforeImport: func(ctx context.Context, tx *sql.Tx) error {
for _, table := range []string{"message_fts", "member_fts"} {
if _, err := tx.ExecContext(ctx, "drop table if exists "+table); err != nil {
return fmt.Errorf("drop %s: %w", table, err)
}
}
return nil
},
DeleteTable: func(ctx context.Context, tx *sql.Tx, table string) error {
query, args := snapshotDeleteQuery(table)
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
return fmt.Errorf("clear %s: %w", table, err)
}
return nil
},
AfterImport: func(ctx context.Context, tx *sql.Tx) error {
if err := repairImportedGuildIDs(ctx, tx); err != nil {
return err
}
if opts.IncludeEmbeddings {
return importEmbeddings(ctx, tx, opts, manifest.Embeddings)
}
return nil
},
}); err != nil {
return Manifest{}, err
}
committed := false
defer func() {
if !committed {
_ = tx.Rollback()
}
}()
for _, table := range []string{"message_fts", "member_fts"} {
opts.reportProgress(ImportProgress{Phase: "drop_fts", Table: table})
if _, err := tx.ExecContext(ctx, "drop table if exists "+table); err != nil {
return Manifest{}, fmt.Errorf("drop %s: %w", table, err)
}
}
for _, table := range slices.Backward(SnapshotTables) {
opts.reportProgress(ImportProgress{Phase: "clear", Table: table})
query, args := snapshotDeleteQuery(table)
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
return Manifest{}, fmt.Errorf("clear %s: %w", table, err)
}
}
for _, table := range manifest.Tables {
if err := ctx.Err(); err != nil {
return Manifest{}, err
}
opts.reportProgress(ImportProgress{Phase: "table_start", Table: table.Name, TotalRows: table.Rows})
if err := importTable(ctx, tx, opts, table); err != nil {
return Manifest{}, err
}
opts.reportProgress(ImportProgress{Phase: "table_done", Table: table.Name, TotalRows: table.Rows})
}
opts.reportProgress(ImportProgress{Phase: "repair"})
if err := repairImportedGuildIDs(ctx, tx); err != nil {
return Manifest{}, err
}
if opts.IncludeEmbeddings {
if err := importEmbeddings(ctx, tx, opts, manifest.Embeddings); err != nil {
return Manifest{}, err
}
}
opts.reportProgress(ImportProgress{Phase: "commit"})
if err := tx.Commit(); err != nil {
return Manifest{}, err
}
committed = true
opts.reportProgress(ImportProgress{Phase: "rebuild_fts"})
if err := s.RebuildSearchIndexes(ctx); err != nil {
return Manifest{}, err
@ -436,6 +361,10 @@ func ReadManifest(repoPath string) (Manifest, error) {
return manifest, nil
}
func mirrorOptions(opts Options) mirror.Options {
return mirror.Options{RepoPath: opts.RepoPath, Remote: opts.Remote, Branch: opts.Branch}
}
func NeedsImport(ctx context.Context, s *store.Store, staleAfter time.Duration) bool {
if staleAfter <= 0 {
staleAfter = 15 * time.Minute

View File

@ -184,6 +184,26 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "channels")), directMessageGuildID)
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "messages")), "private dm content")
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "sync_state")), "wiretap:last_import")
manifest = appendSnapshotRow(t, repo, manifest, "messages", map[string]any{
"id": "hostile-dm",
"guild_id": directMessageGuildID,
"channel_id": "dm-c2",
"author_id": "u9",
"message_type": 0,
"created_at": "2026-04-24T16:00:00Z",
"content": "hostile imported dm",
"normalized_content": "hostile imported dm",
"pinned": 0,
"has_attachments": 0,
"raw_json": `{}`,
"updated_at": "2026-04-24T16:00:00Z",
})
manifest = appendSnapshotRow(t, repo, manifest, "sync_state", map[string]any{
"scope": "wiretap:hostile",
"cursor": "private",
"updated_at": "2026-04-24T16:00:00Z",
})
writeShareManifest(t, repo, manifest)
dst, err := store.Open(ctx, filepath.Join(t.TempDir(), "dst.db"))
require.NoError(t, err)
@ -202,6 +222,12 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
wiretapState, err := dst.GetSyncState(ctx, "wiretap:last_import")
require.NoError(t, err)
require.Equal(t, "2026-04-24T15:33:17Z", wiretapState)
hostileResults, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "hostile imported dm", Limit: 10})
require.NoError(t, err)
require.Empty(t, hostileResults)
_, rows, err := dst.ReadOnlyQuery(ctx, "select count(*) from sync_state where scope = 'wiretap:hostile'")
require.NoError(t, err)
require.Equal(t, "0", rows[0][0])
}
func TestExportImportEmbeddingsOptIn(t *testing.T) {
@ -800,6 +826,33 @@ func writeGzipJSONLines(t *testing.T, path string, lines []string) {
require.NoError(t, file.Close())
}
func appendSnapshotRow(t *testing.T, repo string, manifest Manifest, tableName string, row map[string]any) Manifest {
t.Helper()
for i := range manifest.Tables {
if manifest.Tables[i].Name != tableName {
continue
}
rel := filepath.ToSlash(filepath.Join("tables", tableName, "hostile-"+strconv.Itoa(len(manifest.Tables[i].Files))+".jsonl.gz"))
full := filepath.Join(repo, filepath.FromSlash(rel))
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
body, err := json.Marshal(row)
require.NoError(t, err)
writeGzipJSONLines(t, full, []string{string(body)})
manifest.Tables[i].Files = append(manifest.Tables[i].Files, rel)
manifest.Tables[i].Rows++
return manifest
}
t.Fatalf("table %s not found", tableName)
return manifest
}
func writeShareManifest(t *testing.T, repo string, manifest Manifest) {
t.Helper()
body, err := json.MarshalIndent(manifest, "", " ")
require.NoError(t, err)
require.NoError(t, os.WriteFile(filepath.Join(repo, ManifestName), append(body, '\n'), 0o600))
}
func snapshotTableText(t *testing.T, repo string, table TableManifest) string {
t.Helper()
return snapshotFilesText(t, repo, table.Files)

View File

@ -92,6 +92,8 @@ func TestAttachmentTextAndMentionsAreQueryable(t *testing.T) {
require.NoError(t, err)
require.Len(t, messages, 1)
require.Contains(t, messages[0].Content, "stack trace")
require.Equal(t, "trace.txt", messages[0].AttachmentNames)
require.Contains(t, messages[0].AttachmentText, "stack trace line one")
mentions, err := s.ListMentions(ctx, MentionListOptions{Target: "Shadow", Limit: 10})
require.NoError(t, err)
@ -116,3 +118,72 @@ func TestAttachmentTextAndMentionsAreQueryable(t *testing.T) {
require.NoError(t, err)
require.Len(t, filtered, 1)
}
func TestListMessagesResolvesMentionNamesForDisplay(t *testing.T) {
t.Parallel()
ctx := context.Background()
s, err := Open(ctx, filepath.Join(t.TempDir(), "discrawl.db"))
require.NoError(t, err)
defer func() { _ = s.Close() }()
require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "maintainers", RawJSON: `{}`}))
require.NoError(t, s.UpsertMember(ctx, MemberRecord{
GuildID: "g1",
UserID: "u4",
Username: "fallback",
DisplayName: "Fallback User",
RoleIDsJSON: `[]`,
RawJSON: `{}`,
}))
createdAt := time.Now().UTC().Format(time.RFC3339Nano)
rawContent := "ping <@u2> <@!u3> <@&r1> in <#c1>"
fallbackContent := "ask <@u4> in <#c1>"
require.NoError(t, s.UpsertMessages(ctx, []MessageMutation{
{
Record: MessageRecord{
ID: "m1",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "maintainers",
AuthorID: "u1",
AuthorName: "Peter",
MessageType: 0,
CreatedAt: createdAt,
Content: rawContent,
NormalizedContent: rawContent,
RawJSON: `{}`,
},
Mentions: []MentionEventRecord{
{MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "user", TargetID: "u2", TargetName: "Shadow", EventAt: createdAt},
{MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "user", TargetID: "u3", TargetName: "Vincent", EventAt: createdAt},
{MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "role", TargetID: "r1", TargetName: "Maintainers", EventAt: createdAt},
{MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "channel", TargetID: "c1", TargetName: "maintainers", EventAt: createdAt},
},
},
{
Record: MessageRecord{
ID: "m2",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "maintainers",
AuthorID: "u1",
AuthorName: "Peter",
MessageType: 0,
CreatedAt: createdAt,
Content: fallbackContent,
NormalizedContent: fallbackContent,
RawJSON: `{}`,
},
},
}))
messages, err := s.ListMessages(ctx, MessageListOptions{Channel: "maintainers", Limit: 10})
require.NoError(t, err)
require.Len(t, messages, 2)
require.Equal(t, rawContent, messages[0].Content)
require.Equal(t, "ping @Shadow @Vincent @Maintainers in #maintainers", messages[0].DisplayContent)
require.Equal(t, fallbackContent, messages[1].Content)
require.Equal(t, "ask @Fallback User in #maintainers", messages[1].DisplayContent)
}

View File

@ -2,10 +2,16 @@ package store
import (
"context"
"regexp"
"strings"
"time"
)
var (
discordUserMentionRE = regexp.MustCompile(`<@!?([A-Za-z0-9]+)>`)
discordChannelMentionRE = regexp.MustCompile(`<#([A-Za-z0-9]+)>`)
)
type MessageListOptions struct {
GuildIDs []string
Channel string
@ -29,17 +35,22 @@ type MentionListOptions struct {
}
type MessageRow struct {
MessageID string `json:"message_id"`
GuildID string `json:"guild_id"`
ChannelID string `json:"channel_id"`
ChannelName string `json:"channel_name"`
AuthorID string `json:"author_id"`
AuthorName string `json:"author_name"`
Content string `json:"content"`
CreatedAt time.Time `json:"created_at"`
ReplyToMessage string `json:"reply_to_message_id,omitempty"`
HasAttachments bool `json:"has_attachments"`
Pinned bool `json:"pinned"`
MessageID string `json:"message_id"`
GuildID string `json:"guild_id"`
GuildName string `json:"guild_name,omitempty"`
ChannelID string `json:"channel_id"`
ChannelName string `json:"channel_name"`
AuthorID string `json:"author_id"`
AuthorName string `json:"author_name"`
Content string `json:"content"`
DisplayContent string `json:"display_content,omitempty"`
CreatedAt time.Time `json:"created_at"`
ReplyToMessage string `json:"reply_to_message_id,omitempty"`
Source string `json:"source,omitempty"`
HasAttachments bool `json:"has_attachments"`
AttachmentNames string `json:"attachment_names,omitempty"`
AttachmentText string `json:"attachment_text,omitempty"`
Pinned bool `json:"pinned"`
}
func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]MessageRow, error) {
@ -75,6 +86,7 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
select
m.id,
m.guild_id,
coalesce(g.name, ''),
m.channel_id,
coalesce(c.name, ''),
coalesce(m.author_id, ''),
@ -93,9 +105,13 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
end,
m.created_at,
coalesce(m.reply_to_message_id, ''),
coalesce(json_extract(m.raw_json, '$.source'), ''),
m.has_attachments,
coalesce((select group_concat(a.filename, ', ') from message_attachments a where a.message_id = m.id), ''),
coalesce((select group_concat(a.text_content, char(10)) from message_attachments a where a.message_id = m.id and trim(a.text_content) <> ''), ''),
m.pinned
from messages m
left join guilds g on g.id = m.guild_id
left join channels c on c.id = m.channel_id
left join members mem on mem.guild_id = m.guild_id and mem.user_id = m.author_id
where ` + strings.Join(clauses, " and ") + `
@ -139,6 +155,7 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
if err := rows.Scan(
&row.MessageID,
&row.GuildID,
&row.GuildName,
&row.ChannelID,
&row.ChannelName,
&row.AuthorID,
@ -146,7 +163,10 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
&row.Content,
&created,
&row.ReplyToMessage,
&row.Source,
&hasAttachments,
&row.AttachmentNames,
&row.AttachmentText,
&pinned,
); err != nil {
return nil, err
@ -154,11 +174,364 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
row.CreatedAt = parseTime(created)
row.HasAttachments = hasAttachments == 1
row.Pinned = pinned == 1
row.DisplayContent = row.Content
out = append(out, row)
}
return out, rows.Err()
if err := rows.Err(); err != nil {
return nil, err
}
return out, s.resolveMessageDisplayMentions(ctx, out)
}
func (s *Store) ListMessagesWithThreadContext(ctx context.Context, opts MessageListOptions) ([]MessageRow, error) {
rows, err := s.ListMessages(ctx, opts)
if err != nil {
return nil, err
}
return s.hydrateMessageThreadContext(ctx, rows, opts.Limit+opts.Last)
}
func (s *Store) hydrateMessageThreadContext(ctx context.Context, rows []MessageRow, limit int) ([]MessageRow, error) {
if len(rows) == 0 {
return rows, nil
}
rootIDs := make([]any, 0, len(rows))
seenRoots := map[string]struct{}{}
visible := map[string]struct{}{}
for _, row := range rows {
if id := strings.TrimSpace(row.MessageID); id != "" {
visible[id] = struct{}{}
}
}
for _, row := range rows {
rootID := strings.TrimSpace(row.ReplyToMessage)
if rootID == "" {
continue
}
if _, ok := visible[rootID]; ok {
continue
}
if _, ok := seenRoots[rootID]; ok {
continue
}
seenRoots[rootID] = struct{}{}
rootIDs = append(rootIDs, rootID)
}
if len(rootIDs) == 0 {
return rows, nil
}
query := `
select
m.id,
m.guild_id,
coalesce(g.name, ''),
m.channel_id,
coalesce(c.name, ''),
coalesce(m.author_id, ''),
coalesce(
nullif(mem.display_name, ''),
nullif(mem.nick, ''),
nullif(mem.global_name, ''),
nullif(mem.username, ''),
nullif(json_extract(m.raw_json, '$.author.global_name'), ''),
nullif(json_extract(m.raw_json, '$.author.username'), ''),
''
),
case
when trim(coalesce(m.content, '')) <> '' then m.content
else m.normalized_content
end,
m.created_at,
coalesce(m.reply_to_message_id, ''),
coalesce(json_extract(m.raw_json, '$.source'), ''),
m.has_attachments,
coalesce((select group_concat(a.filename, ', ') from message_attachments a where a.message_id = m.id), ''),
coalesce((select group_concat(a.text_content, char(10)) from message_attachments a where a.message_id = m.id and trim(a.text_content) <> ''), ''),
m.pinned
from messages m
left join guilds g on g.id = m.guild_id
left join channels c on c.id = m.channel_id
left join members mem on mem.guild_id = m.guild_id and mem.user_id = m.author_id
where m.id in (` + placeholders(len(rootIDs)) + `)
order by m.created_at asc, m.id asc`
contextRows, err := s.db.QueryContext(ctx, query, rootIDs...)
if err != nil {
return nil, err
}
defer func() { _ = contextRows.Close() }()
extra, err := scanMessageRows(contextRows)
if err != nil {
return nil, err
}
if err := s.resolveMessageDisplayMentions(ctx, extra); err != nil {
return nil, err
}
return mergeMessageRows(rows, extra), nil
}
func scanMessageRows(rows rowScanner) ([]MessageRow, error) {
var out []MessageRow
for rows.Next() {
var row MessageRow
var created string
var hasAttachments int
var pinned int
if err := rows.Scan(
&row.MessageID,
&row.GuildID,
&row.GuildName,
&row.ChannelID,
&row.ChannelName,
&row.AuthorID,
&row.AuthorName,
&row.Content,
&created,
&row.ReplyToMessage,
&row.Source,
&hasAttachments,
&row.AttachmentNames,
&row.AttachmentText,
&pinned,
); err != nil {
return nil, err
}
row.CreatedAt = parseTime(created)
row.HasAttachments = hasAttachments == 1
row.Pinned = pinned == 1
row.DisplayContent = row.Content
out = append(out, row)
}
if err := rows.Err(); err != nil {
return nil, err
}
return out, nil
}
type rowScanner interface {
Next() bool
Scan(dest ...any) error
Err() error
}
func mergeMessageRows(primary, extra []MessageRow) []MessageRow {
out := make([]MessageRow, 0, len(primary)+len(extra))
seen := map[string]struct{}{}
appendRow := func(row MessageRow) {
key := row.GuildID + "\x00" + row.ChannelID + "\x00" + row.MessageID
if _, ok := seen[key]; ok {
return
}
seen[key] = struct{}{}
out = append(out, row)
}
for _, row := range primary {
appendRow(row)
}
for _, row := range extra {
appendRow(row)
}
return out
}
func normalizeChannelFilter(raw string) string {
return strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(raw), "#"))
}
func (s *Store) resolveMessageDisplayMentions(ctx context.Context, rows []MessageRow) error {
if len(rows) == 0 {
return nil
}
ids := make([]any, 0, len(rows))
indexByID := make(map[string]int, len(rows))
for index, row := range rows {
id := strings.TrimSpace(row.MessageID)
if id == "" {
continue
}
ids = append(ids, id)
indexByID[id] = index
}
if len(ids) == 0 {
return nil
}
query := `select message_id, target_type, target_id, target_name from mention_events where message_id in (` + placeholders(len(ids)) + `)`
mentionRows, err := s.db.QueryContext(ctx, query, ids...)
if err != nil {
return err
}
defer func() { _ = mentionRows.Close() }()
for mentionRows.Next() {
var messageID, targetType, targetID, targetName string
if err := mentionRows.Scan(&messageID, &targetType, &targetID, &targetName); err != nil {
return err
}
index, ok := indexByID[messageID]
if !ok {
continue
}
rows[index].DisplayContent = replaceDiscordMention(rows[index].DisplayContent, targetType, targetID, targetName)
}
if err := mentionRows.Err(); err != nil {
return err
}
return s.resolveInlineDiscordMentions(ctx, rows)
}
func replaceDiscordMention(content, targetType, targetID, targetName string) string {
targetID = strings.TrimSpace(targetID)
if targetID == "" {
return content
}
label := strings.TrimSpace(targetName)
if label == "" {
label = targetID
}
switch strings.TrimSpace(targetType) {
case "role":
return strings.ReplaceAll(content, "<@&"+targetID+">", "@"+label)
case "channel":
return strings.ReplaceAll(content, "<#"+targetID+">", "#"+label)
default:
content = strings.ReplaceAll(content, "<@"+targetID+">", "@"+label)
return strings.ReplaceAll(content, "<@!"+targetID+">", "@"+label)
}
}
func (s *Store) resolveInlineDiscordMentions(ctx context.Context, rows []MessageRow) error {
userIDs := map[string]struct{}{}
channelIDs := map[string]struct{}{}
for _, row := range rows {
for _, match := range discordUserMentionRE.FindAllStringSubmatch(row.DisplayContent, -1) {
if len(match) > 1 && strings.TrimSpace(match[1]) != "" {
userIDs[match[1]] = struct{}{}
}
}
for _, match := range discordChannelMentionRE.FindAllStringSubmatch(row.DisplayContent, -1) {
if len(match) > 1 && strings.TrimSpace(match[1]) != "" {
channelIDs[match[1]] = struct{}{}
}
}
}
userNames, err := s.discordMemberDisplayNames(ctx, userIDs)
if err != nil {
return err
}
channelNames, err := s.discordChannelNames(ctx, channelIDs)
if err != nil {
return err
}
for index := range rows {
guildID := strings.TrimSpace(rows[index].GuildID)
rows[index].DisplayContent = discordUserMentionRE.ReplaceAllStringFunc(rows[index].DisplayContent, func(match string) string {
parts := discordUserMentionRE.FindStringSubmatch(match)
if len(parts) < 2 {
return match
}
if name := firstResolvedDiscordName(userNames, guildID, parts[1]); name != "" {
return "@" + name
}
return match
})
rows[index].DisplayContent = discordChannelMentionRE.ReplaceAllStringFunc(rows[index].DisplayContent, func(match string) string {
parts := discordChannelMentionRE.FindStringSubmatch(match)
if len(parts) < 2 {
return match
}
if name := firstResolvedDiscordName(channelNames, guildID, parts[1]); name != "" {
return "#" + name
}
return match
})
}
return nil
}
func (s *Store) discordMemberDisplayNames(ctx context.Context, ids map[string]struct{}) (map[string]string, error) {
if len(ids) == 0 {
return nil, nil
}
args := mapKeysAsAny(ids)
query := `
select guild_id, user_id,
coalesce(
nullif(display_name, ''),
nullif(nick, ''),
nullif(global_name, ''),
nullif(username, ''),
''
)
from members
where user_id in (` + placeholders(len(args)) + `)
`
rows, err := s.db.QueryContext(ctx, query, args...)
if err != nil {
return nil, err
}
defer func() { _ = rows.Close() }()
out := map[string]string{}
for rows.Next() {
var guildID, userID, name string
if err := rows.Scan(&guildID, &userID, &name); err != nil {
return nil, err
}
rememberResolvedDiscordName(out, guildID, userID, name)
}
return out, rows.Err()
}
func (s *Store) discordChannelNames(ctx context.Context, ids map[string]struct{}) (map[string]string, error) {
if len(ids) == 0 {
return nil, nil
}
args := mapKeysAsAny(ids)
query := `select guild_id, id, coalesce(nullif(name, ''), '') from channels where id in (` + placeholders(len(args)) + `)`
rows, err := s.db.QueryContext(ctx, query, args...)
if err != nil {
return nil, err
}
defer func() { _ = rows.Close() }()
out := map[string]string{}
for rows.Next() {
var guildID, channelID, name string
if err := rows.Scan(&guildID, &channelID, &name); err != nil {
return nil, err
}
rememberResolvedDiscordName(out, guildID, channelID, name)
}
return out, rows.Err()
}
func mapKeysAsAny(values map[string]struct{}) []any {
out := make([]any, 0, len(values))
for value := range values {
out = append(out, value)
}
return out
}
func rememberResolvedDiscordName(out map[string]string, guildID, id, name string) {
id = strings.TrimSpace(id)
name = strings.TrimSpace(name)
if id == "" || name == "" {
return
}
if guildID = strings.TrimSpace(guildID); guildID != "" {
out[guildID+"|"+id] = name
}
if _, ok := out["|"+id]; !ok {
out["|"+id] = name
}
}
func firstResolvedDiscordName(values map[string]string, guildID, id string) string {
id = strings.TrimSpace(id)
if id == "" {
return ""
}
if guildID = strings.TrimSpace(guildID); guildID != "" {
if value := strings.TrimSpace(values[guildID+"|"+id]); value != "" {
return value
}
}
return strings.TrimSpace(values["|"+id])
}

View File

@ -5,13 +5,10 @@ import (
"database/sql"
"fmt"
"hash/fnv"
"os"
"path/filepath"
"runtime"
"strconv"
"time"
_ "modernc.org/sqlite"
crawlstore "github.com/vincentkoc/crawlkit/store"
)
const (
@ -114,66 +111,33 @@ type ChannelRow struct {
}
func Open(ctx context.Context, path string) (*Store, error) {
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return nil, fmt.Errorf("mkdir db dir: %w", err)
}
if err := ensureDBFile(path); err != nil {
return nil, err
}
dsn := fmt.Sprintf(
"file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
path,
)
db, err := sql.Open("sqlite", dsn)
base, err := crawlstore.Open(ctx, crawlstore.Options{Path: path})
if err != nil {
return nil, fmt.Errorf("open sqlite: %w", err)
}
// SQLite is single-writer; keep one shared connection so concurrent callers queue
// instead of contending on separate writer connections.
db.SetMaxOpenConns(1)
db.SetMaxIdleConns(1)
if err := db.PingContext(ctx); err != nil {
_ = db.Close()
return nil, fmt.Errorf("ping sqlite: %w", err)
}
if err := tightenDBFilePerms(path); err != nil {
_ = db.Close()
return nil, err
}
db := base.DB()
store := &Store{db: db, path: path}
if err := store.migrate(ctx); err != nil {
_ = db.Close()
_ = base.Close()
return nil, err
}
return store, nil
}
func ensureDBFile(path string) error {
if _, err := os.Stat(path); err == nil {
return nil
} else if !os.IsNotExist(err) {
return fmt.Errorf("stat db file: %w", err)
func OpenReadOnly(ctx context.Context, path string) (*Store, error) {
base, err := crawlstore.OpenReadOnly(ctx, path)
if err != nil {
return nil, err
}
file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil && !os.IsExist(err) {
return fmt.Errorf("create db file: %w", err)
store := &Store{db: base.DB(), path: path}
if version, err := store.schemaVersion(ctx); err != nil {
_ = base.Close()
return nil, err
} else if version != storeSchemaVersion {
_ = base.Close()
return nil, fmt.Errorf("database schema version mismatch: got %d want %d", version, storeSchemaVersion)
}
if file != nil {
if closeErr := file.Close(); closeErr != nil {
return fmt.Errorf("close db file: %w", closeErr)
}
}
return nil
}
func tightenDBFilePerms(path string) error {
if runtime.GOOS == "windows" {
return nil
}
if err := os.Chmod(path, 0o600); err != nil {
return fmt.Errorf("chmod db file: %w", err)
}
return nil
return store, nil
}
func (s *Store) Close() error {

View File

@ -149,8 +149,9 @@ func TestStoreMaintenanceHelpers(t *testing.T) {
require.NoError(t, err)
defer func() { _ = s.Close() }()
require.NoError(t, ensureDBFile(dbPath))
require.NoError(t, tightenDBFilePerms(dbPath))
info, err := os.Stat(dbPath)
require.NoError(t, err)
require.Equal(t, os.FileMode(0o600), info.Mode().Perm())
require.NoError(t, s.RebuildSearchIndexes(ctx))
version, err := s.schemaVersion(ctx)
require.NoError(t, err)
@ -1519,6 +1520,7 @@ func TestListMessagesFiltersAndLimit(t *testing.T) {
require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "maintainers", RawJSON: `{}`}))
require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c2", GuildID: "g1", Kind: "text", Name: "random", RawJSON: `{}`}))
require.NoError(t, s.UpsertGuild(ctx, GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}))
require.NoError(t, s.UpsertMember(ctx, MemberRecord{
GuildID: "g1",
UserID: "u1",
@ -1625,6 +1627,7 @@ func TestListMessagesFiltersAndLimit(t *testing.T) {
require.Len(t, rows, 1)
require.Equal(t, "m4", rows[0].MessageID)
require.Equal(t, "fallback-user", rows[0].AuthorName)
require.Equal(t, "Guild", rows[0].GuildName)
require.True(t, rows[0].Pinned)
require.True(t, rows[0].HasAttachments)
@ -1666,6 +1669,49 @@ func TestListMessagesFiltersAndLimit(t *testing.T) {
require.Equal(t, "m4", rows[1].MessageID)
}
func TestListMessagesWithThreadContextHydratesReplyRoot(t *testing.T) {
t.Parallel()
ctx := context.Background()
s, err := Open(ctx, filepath.Join(t.TempDir(), "discrawl.db"))
require.NoError(t, err)
defer func() { _ = s.Close() }()
require.NoError(t, s.UpsertGuild(ctx, GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}))
require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}))
require.NoError(t, s.UpsertMessage(ctx, MessageRecord{
ID: "root",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "general",
AuthorID: "u1",
MessageType: 0,
CreatedAt: "2026-03-01T10:00:00Z",
Content: "root message",
NormalizedContent: "root message",
RawJSON: `{}`,
}))
require.NoError(t, s.UpsertMessage(ctx, MessageRecord{
ID: "reply",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "general",
AuthorID: "u2",
MessageType: 0,
CreatedAt: "2026-03-02T10:00:00Z",
Content: "reply message",
NormalizedContent: "reply message",
ReplyToMessageID: "root",
RawJSON: `{}`,
}))
rows, err := s.ListMessagesWithThreadContext(ctx, MessageListOptions{Last: 1})
require.NoError(t, err)
require.Len(t, rows, 2)
require.Equal(t, "reply", rows[0].MessageID)
require.Equal(t, "root", rows[1].MessageID)
}
func TestNormalizeFTSQueryEdgeCases(t *testing.T) {
t.Parallel()

View File

@ -7,6 +7,8 @@ import (
"time"
"github.com/bwmarrin/discordgo"
"github.com/vincentkoc/crawlkit/progress"
"github.com/openclaw/discrawl/internal/store"
)
@ -663,6 +665,8 @@ func (p *messageSyncProgress) complete(channel *discordgo.Channel, count int, ou
totalChannels := p.totalChannels
messages := p.messages
elapsed := now.Sub(p.startedAt).Round(time.Second).String()
percent := progress.Percent(int64(processed), int64(totalChannels))
completion := progress.Completion(int64(processed), int64(totalChannels))
p.mu.Unlock()
p.syncer.logger.Info(
"message sync progress",
@ -670,6 +674,8 @@ func (p *messageSyncProgress) complete(channel *discordgo.Channel, count int, ou
"processed_channels", processed,
"total_channels", totalChannels,
"remaining_channels", totalChannels-processed,
"percent", percent,
"completion", completion,
"active_channels", activeChannels,
"messages_written", messages,
"deferred_channels", deferred,
@ -698,6 +704,8 @@ func (p *messageSyncProgress) finish(err error) {
totalChannels := p.totalChannels
messages := p.messages
elapsed := now.Sub(p.startedAt).Round(time.Second).String()
percent := progress.Percent(int64(processed), int64(totalChannels))
completion := progress.Completion(int64(processed), int64(totalChannels))
oldestID, oldestName, oldestElapsed, oldestIdle, oldestPages, oldestPageMessages := oldestInflightDetails(p.inflight, now)
p.mu.Unlock()
attrs := []any{
@ -705,6 +713,8 @@ func (p *messageSyncProgress) finish(err error) {
"processed_channels", processed,
"total_channels", totalChannels,
"remaining_channels", totalChannels - processed,
"percent", percent,
"completion", completion,
"active_channels", activeChannels,
"messages_written", messages,
"deferred_channels", deferred,
@ -766,6 +776,8 @@ func (p *messageSyncProgress) logWaitHeartbeat() {
messages := p.messages
idleFor := now.Sub(p.lastProgressAt).Round(time.Second).String()
elapsed := now.Sub(p.startedAt).Round(time.Second).String()
percent := progress.Percent(int64(processed), int64(totalChannels))
completion := progress.Completion(int64(processed), int64(totalChannels))
oldestID, oldestName, oldestElapsed, oldestIdle, oldestPages, oldestPageMessages := oldestInflightDetails(p.inflight, now)
p.mu.Unlock()
p.syncer.logger.Info(
@ -774,6 +786,8 @@ func (p *messageSyncProgress) logWaitHeartbeat() {
"processed_channels", processed,
"total_channels", totalChannels,
"remaining_channels", totalChannels-processed,
"percent", percent,
"completion", completion,
"active_channels", activeChannels,
"messages_written", messages,
"deferred_channels", deferred,

View File

@ -73,6 +73,8 @@ func TestMessageSyncProgressFinishReportsSummaryCounts(t *testing.T) {
logs := out.String()
require.Contains(t, logs, `msg="message sync finished"`)
require.Contains(t, logs, `processed_channels=3`)
require.Contains(t, logs, `percent=100.0`)
require.Contains(t, logs, `completion=100.0%`)
require.Contains(t, logs, `messages_written=42`)
require.Contains(t, logs, `skipped_missing_access_channels=1`)
require.Contains(t, logs, `skipped_unknown_channel_channels=1`)
@ -105,4 +107,6 @@ func TestMessageSyncProgressReportsWaitingHeartbeat(t *testing.T) {
require.Contains(t, logs, `oldest_active_channel_id=c1`)
require.Contains(t, logs, `oldest_active_channel_name=slowpoke`)
require.Contains(t, logs, `active_channels=1`)
require.Contains(t, logs, `percent=0.0`)
require.Contains(t, logs, `completion=0.0%`)
}