merge: use crawlkit infrastructure
* feat/use-crawlkit: (50 commits) fix(release): update version ldflag module path chore(deps): use crawlkit v0.4.0 fix(tui): hydrate discord roots without thread scans fix(tui): limit discord thread hydration fix(tui): hydrate discord reply context fix(share): forward snapshot import progress fix(tui): browse newest discord messages fix(tui): show discord attachment details feat(tui): refresh discord archive rows fix(tui): resolve discord inline mentions fix(tui): render discord mention names docs: note shared tui polish fix(tui): document shared controls fix(tui): expose discord message details fix(tui): add Discord message URLs docs(tui): note dm pane labels fix(tui): label discord direct message panes fix(tui): use compact-pane crawlkit fix(tui): pick up shared detail renderer fix(sync): include progress percentages ...
This commit is contained in:
commit
ebb41dabfd
14
.github/workflows/ci.yml
vendored
14
.github/workflows/ci.yml
vendored
@ -91,7 +91,19 @@ jobs:
|
||||
}'
|
||||
|
||||
- name: Build
|
||||
run: go build ./cmd/discrawl
|
||||
run: go build -o bin/discrawl ./cmd/discrawl
|
||||
|
||||
- name: Smoke test CLI control surface
|
||||
run: |
|
||||
set -euo pipefail
|
||||
output="$(./bin/discrawl help)"
|
||||
printf '%s\n' "$output"
|
||||
printf '%s' "$output" | grep -q "metadata"
|
||||
printf '%s' "$output" | grep -q "tui"
|
||||
test -n "$(./bin/discrawl --version)"
|
||||
./bin/discrawl metadata --json | grep -q '"schema_version"'
|
||||
./bin/discrawl status --json | grep -q '"databases"'
|
||||
./bin/discrawl tui --json | grep -q '^\['
|
||||
|
||||
deps:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@ -12,7 +12,7 @@ builds:
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
ldflags:
|
||||
- -s -w -X github.com/steipete/discrawl/internal/cli.version={{ .Version }}
|
||||
- -s -w -X github.com/openclaw/discrawl/internal/cli.version={{ .Version }}
|
||||
targets:
|
||||
- darwin_amd64
|
||||
- darwin_arm64
|
||||
|
||||
14
CHANGELOG.md
14
CHANGELOG.md
@ -26,10 +26,24 @@
|
||||
- Refreshed dependency and CI tooling pins, including GoReleaser, `go-toml`, golangci-lint, and gosec.
|
||||
- Tightened CI compatibility with the latest linters and made signal-cancellation and sync fixture tests deterministic under the race detector.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Label direct-message TUI panes as direct messages instead of raw `@me` guild rows, keeping DM channel/person context readable.
|
||||
- Inherit shared crawlkit TUI improvements for newest-first startup, count-header sorting, selected-message-first chat detail panes, and gitcrawl-style metadata labels.
|
||||
- Surface Discord attachment filenames and extracted text in TUI detail panes instead of only showing `attachments=true`.
|
||||
|
||||
## 0.6.3 - 2026-05-01
|
||||
|
||||
### Changes
|
||||
|
||||
- Add crawlkit control metadata/status surfaces with `metadata --json`, `status --json`, and `doctor --json`.
|
||||
- Add `tap` and `cache-import` as public desktop-cache import names while keeping `wiretap` as a documented legacy alias.
|
||||
- Add `discrawl tui`, a terminal archive browser for stored guild messages and local `@me` wiretap DMs using the shared `crawlkit/tui` package.
|
||||
- Render TUI rows with compact panes and expose pinned, attachment, reply, channel, and author metadata in the detail pane.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Keep status and TUI reads safe for fresh or missing local databases without triggering git-share auto-update.
|
||||
- Added OS keyring fallback for Discord bot-token resolution, keeping env as the first source and documenting the default keyring item. (#17)
|
||||
- Clarified and locked down FTS query normalization so operator-like search terms such as `AND`, `OR`, `NOT`, `NEAR`, and `*` stay parameterized and quoted before SQLite `MATCH`. Thanks @mvanhorn.
|
||||
|
||||
|
||||
13
README.md
13
README.md
@ -22,6 +22,7 @@ Wiretap DMs stay local and are never exported to the Git-backed snapshot mirror.
|
||||
- tails Gateway events for live updates, with periodic repair syncs
|
||||
- imports classifiable Discord Desktop cache messages with `wiretap`, including proven DMs under `@me`
|
||||
- publishes and imports private Git-backed archive snapshots for org-wide read access
|
||||
- browses stored messages and local DMs in a terminal archive UI
|
||||
- supports Git-only read mode with no Discord credentials on reader machines
|
||||
- generates backup README activity reports, with optional AI-written field notes
|
||||
- exposes read-only SQL for ad hoc analysis
|
||||
@ -159,6 +160,17 @@ discrawl messages --channel general --hours 24
|
||||
|
||||
## Commands
|
||||
|
||||
### `tui`
|
||||
|
||||
Opens the local terminal archive browser for stored messages.
|
||||
|
||||
```bash
|
||||
discrawl tui
|
||||
discrawl tui --guild 123456789012345678 --channel general
|
||||
discrawl tui --dm
|
||||
discrawl --json tui --limit 50
|
||||
```
|
||||
|
||||
### `init`
|
||||
|
||||
Creates the local config and discovers accessible guilds.
|
||||
@ -683,6 +695,7 @@ go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.1 run
|
||||
go test ./... -coverprofile=/tmp/discrawl.cover
|
||||
go tool cover -func=/tmp/discrawl.cover | tail -n 1
|
||||
go build ./cmd/discrawl
|
||||
go run ./cmd/discrawl help | grep tui
|
||||
```
|
||||
|
||||
Target coverage is `>= 85%`.
|
||||
|
||||
28
go.mod
28
go.mod
@ -5,26 +5,50 @@ go 1.26.2
|
||||
require (
|
||||
github.com/bwmarrin/discordgo v0.29.0
|
||||
github.com/gorilla/websocket v1.5.3
|
||||
github.com/pelletier/go-toml/v2 v2.3.1
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/zalando/go-keyring v0.2.8
|
||||
golang.org/x/sys v0.43.0
|
||||
golang.org/x/text v0.36.0
|
||||
modernc.org/sqlite v1.50.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/charmbracelet/bubbles v1.0.0 // indirect
|
||||
github.com/clipperhouse/displaywidth v0.9.0 // indirect
|
||||
github.com/clipperhouse/stringish v0.1.1 // indirect
|
||||
github.com/clipperhouse/uax29/v2 v2.5.0 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
|
||||
modernc.org/sqlite v1.50.0 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
|
||||
github.com/charmbracelet/bubbletea v1.3.10 // indirect
|
||||
github.com/charmbracelet/colorprofile v0.4.1 // indirect
|
||||
github.com/charmbracelet/lipgloss v1.1.0 // indirect
|
||||
github.com/charmbracelet/x/ansi v0.11.6 // indirect
|
||||
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
|
||||
github.com/charmbracelet/x/term v0.2.2 // indirect
|
||||
github.com/danieljoos/wincred v1.2.3 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
|
||||
github.com/godbus/dbus/v5 v5.2.2 // indirect
|
||||
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
|
||||
github.com/mattn/go-isatty v0.0.22 // indirect
|
||||
github.com/mattn/go-localereader v0.0.1 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.19 // indirect
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
|
||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||
github.com/muesli/termenv v0.16.0 // indirect
|
||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/vincentkoc/crawlkit v0.4.0
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||
golang.org/x/crypto v0.50.0 // indirect
|
||||
golang.org/x/tools v0.44.0 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
|
||||
45
go.sum
45
go.sum
@ -1,5 +1,27 @@
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
|
||||
github.com/bwmarrin/discordgo v0.29.0 h1:FmWeXFaKUwrcL3Cx65c20bTRW+vOb6k8AnaP+EgjDno=
|
||||
github.com/bwmarrin/discordgo v0.29.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY=
|
||||
github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc=
|
||||
github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
|
||||
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
|
||||
github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
|
||||
github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk=
|
||||
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
|
||||
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
|
||||
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
|
||||
github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
|
||||
github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
|
||||
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
|
||||
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
|
||||
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
|
||||
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
|
||||
github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA=
|
||||
github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA=
|
||||
github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
|
||||
github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
|
||||
github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U=
|
||||
github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/danieljoos/wincred v1.2.3 h1:v7dZC2x32Ut3nEfRH+vhoZGvN72+dQ/snVXo/vMFLdQ=
|
||||
github.com/danieljoos/wincred v1.2.3/go.mod h1:6qqX0WNrS4RzPZ1tnroDzq9kY3fu1KwE7MRLQK4X0bs=
|
||||
@ -7,6 +29,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
|
||||
github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ=
|
||||
github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
|
||||
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 h1:EwtI+Al+DeppwYX2oXJCETMO23COyaKGP6fHVpkpWpg=
|
||||
@ -25,8 +49,20 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
|
||||
github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
|
||||
github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
|
||||
github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
|
||||
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
|
||||
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
|
||||
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
|
||||
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
|
||||
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
|
||||
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
|
||||
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
|
||||
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
|
||||
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||
github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc=
|
||||
@ -36,23 +72,32 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/vincentkoc/crawlkit v0.4.0 h1:1jQZAYbBivy6d7ewNdMZ8THgmJVwb+pQT0kH5Z9COHI=
|
||||
github.com/vincentkoc/crawlkit v0.4.0/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||
github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs=
|
||||
github.com/zalando/go-keyring v0.2.8/go.mod h1:tsMo+VpRq5NGyKfxoBVjCuMrG47yj8cmakZDO5QGii0=
|
||||
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
|
||||
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
|
||||
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
||||
golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM=
|
||||
golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
|
||||
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
|
||||
@ -17,6 +17,7 @@ import (
|
||||
"github.com/openclaw/discrawl/internal/discord"
|
||||
"github.com/openclaw/discrawl/internal/discorddesktop"
|
||||
"github.com/openclaw/discrawl/internal/embed"
|
||||
"github.com/openclaw/discrawl/internal/share"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/openclaw/discrawl/internal/syncer"
|
||||
)
|
||||
@ -314,16 +315,37 @@ func (r *runtime) runWiretap(args []string) error {
|
||||
}
|
||||
|
||||
func (r *runtime) runStatus(args []string) error {
|
||||
if len(args) != 0 {
|
||||
fs := flag.NewFlagSet("status", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
jsonOut := fs.Bool("json", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("status takes no arguments"))
|
||||
}
|
||||
if *jsonOut {
|
||||
r.json = true
|
||||
}
|
||||
dbPath, err := config.ExpandPath(r.cfg.DBPath)
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
status, err := r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
|
||||
if err != nil {
|
||||
return err
|
||||
status := store.Status{DBPath: dbPath, DefaultGuildID: r.cfg.EffectiveDefaultGuildID()}
|
||||
if r.store != nil {
|
||||
status, err = r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.json {
|
||||
needsUpdate := false
|
||||
if r.store != nil && r.cfg.ShareEnabled() {
|
||||
if staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter); err == nil {
|
||||
needsUpdate = share.NeedsImport(r.ctx, r.store, staleAfter)
|
||||
}
|
||||
}
|
||||
return r.print(controlStatus(r.configPath, r.cfg, status, needsUpdate))
|
||||
}
|
||||
return r.print(status)
|
||||
}
|
||||
@ -384,9 +406,18 @@ func (r *runtime) runEmbed(args []string) error {
|
||||
}
|
||||
|
||||
func (r *runtime) runDoctor(args []string) error {
|
||||
if len(args) != 0 {
|
||||
fs := flag.NewFlagSet("doctor", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
jsonOut := fs.Bool("json", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("doctor takes no arguments"))
|
||||
}
|
||||
if *jsonOut {
|
||||
r.json = true
|
||||
}
|
||||
report := map[string]any{
|
||||
"config_path": r.configPath,
|
||||
}
|
||||
|
||||
@ -47,6 +47,10 @@ func ExitCode(err error) int {
|
||||
}
|
||||
|
||||
func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
|
||||
if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
|
||||
printUsage(stdout)
|
||||
return nil
|
||||
}
|
||||
global := flag.NewFlagSet("discrawl", flag.ContinueOnError)
|
||||
global.SetOutput(io.Discard)
|
||||
configPath := global.String("config", "", "")
|
||||
@ -66,10 +70,14 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
|
||||
return nil
|
||||
}
|
||||
rest := global.Args()
|
||||
if len(rest) == 0 || rest[0] == "help" {
|
||||
if len(rest) == 0 || rest[0] == "help" || rest[0] == "--help" || rest[0] == "-h" {
|
||||
printUsage(stdout)
|
||||
return nil
|
||||
}
|
||||
if rest[0] == "version" {
|
||||
_, _ = io.WriteString(stdout, version+"\n")
|
||||
return nil
|
||||
}
|
||||
level := slog.LevelInfo
|
||||
if *quiet {
|
||||
level = slog.LevelError
|
||||
@ -129,6 +137,8 @@ type attachmentTextConfigurer interface {
|
||||
|
||||
func (r *runtime) dispatch(rest []string) error {
|
||||
switch rest[0] {
|
||||
case "metadata":
|
||||
return r.runMetadata(rest[1:])
|
||||
case "init":
|
||||
return r.runInit(rest[1:])
|
||||
case "sync":
|
||||
@ -141,9 +151,16 @@ func (r *runtime) dispatch(rest []string) error {
|
||||
return r.withServicesLocked(true, func() error { return r.runTail(rest[1:]) })
|
||||
case "wiretap":
|
||||
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
|
||||
case "tap", "cache-import":
|
||||
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
|
||||
case "search":
|
||||
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
|
||||
return r.withLocalStoreDefaultLocked(autoShareUpdate, autoShareUpdate, func() error { return r.runSearch(rest[1:]) })
|
||||
case "tui":
|
||||
if hasHelpArg(rest[1:]) {
|
||||
return r.runTUI(rest[1:])
|
||||
}
|
||||
return r.withLocalStoreReadOnly(func() error { return r.runTUI(rest[1:]) })
|
||||
case "messages":
|
||||
if hasBoolFlag(rest[1:], "--sync") && !hasBoolFlag(rest[1:], "--dm") {
|
||||
return r.withServicesAutoLocked(true, true, true, func() error { return r.runMessages(rest[1:]) })
|
||||
@ -167,7 +184,7 @@ func (r *runtime) dispatch(rest []string) error {
|
||||
case "channels":
|
||||
return r.withLocalStoreLocked(true, func() error { return r.runChannels(rest[1:]) })
|
||||
case "status":
|
||||
return r.withLocalStoreLocked(true, func() error { return r.runStatus(rest[1:]) })
|
||||
return r.withLocalStoreReadOnly(func() error { return r.runStatus(rest[1:]) })
|
||||
case "report":
|
||||
return r.withLocalStoreLocked(true, func() error { return r.runReport(rest[1:]) })
|
||||
case "publish":
|
||||
@ -249,6 +266,35 @@ func (r *runtime) openLocalStore(dbPath string, updateMode shareUpdateMode, fn f
|
||||
return fn()
|
||||
}
|
||||
|
||||
func (r *runtime) withLocalStoreReadOnly(fn func() error) error {
|
||||
cfg, err := config.Load(r.configPath)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
return configErr(err)
|
||||
}
|
||||
cfg = config.Default()
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
}
|
||||
dbPath, err := config.ExpandPath(cfg.DBPath)
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
r.cfg = cfg
|
||||
var openErr error
|
||||
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
|
||||
if openErr != nil {
|
||||
if errors.Is(openErr, os.ErrNotExist) {
|
||||
r.store = nil
|
||||
return fn()
|
||||
}
|
||||
return dbErr(openErr)
|
||||
}
|
||||
defer func() { _ = r.store.Close() }()
|
||||
return fn()
|
||||
}
|
||||
|
||||
func (r *runtime) withServicesAuto(withDiscord, autoShareUpdate bool, fn func() error) error {
|
||||
return r.withServicesAutoLocked(withDiscord, autoShareUpdate, false, fn)
|
||||
}
|
||||
|
||||
@ -76,6 +76,21 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
|
||||
NormalizedContent: "panic locked database",
|
||||
RawJSON: `{}`,
|
||||
}))
|
||||
require.NoError(t, s.UpsertGuild(ctx, store.GuildRecord{ID: "g2", Name: "Other Guild", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g2", Kind: "text", Name: "random", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{
|
||||
ID: "m-other",
|
||||
GuildID: "g2",
|
||||
ChannelID: "c2",
|
||||
ChannelName: "random",
|
||||
AuthorID: "u2",
|
||||
AuthorName: "Outside",
|
||||
MessageType: 0,
|
||||
CreatedAt: time.Now().UTC().Add(-time.Hour).Format(time.RFC3339Nano),
|
||||
Content: "outside default guild",
|
||||
NormalizedContent: "outside default guild",
|
||||
RawJSON: `{}`,
|
||||
}))
|
||||
require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{
|
||||
ID: "m2",
|
||||
GuildID: "g1",
|
||||
@ -137,6 +152,35 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
|
||||
require.NoError(t, Run(ctx, args, &out, &bytes.Buffer{}))
|
||||
require.NotEmpty(t, out.String())
|
||||
}
|
||||
|
||||
before, err := os.ReadFile(dbPath)
|
||||
require.NoError(t, err)
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "tui", "--limit", "5"}, &out, &bytes.Buffer{}))
|
||||
var rows []map[string]any
|
||||
require.NoError(t, json.Unmarshal(out.Bytes(), &rows))
|
||||
require.NotEmpty(t, rows)
|
||||
require.Equal(t, "panic locked database", rows[0]["title"])
|
||||
require.Equal(t, "discord", rows[0]["source"])
|
||||
require.Equal(t, "message", rows[0]["kind"])
|
||||
require.Equal(t, "Guild", rows[0]["scope"])
|
||||
require.Equal(t, "general", rows[0]["container"])
|
||||
require.Equal(t, "https://discord.com/channels/g1/c1/m1", rows[0]["url"])
|
||||
after, err := os.ReadFile(dbPath)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, before, after, "tui --json should not mutate the database")
|
||||
}
|
||||
|
||||
func TestTUIHelpReturnsUsage(t *testing.T) {
|
||||
var stdout bytes.Buffer
|
||||
var stderr bytes.Buffer
|
||||
|
||||
require.NoError(t, Run(context.Background(), []string{"tui", "--help"}, &stdout, &stderr))
|
||||
require.Contains(t, stdout.String(), "Usage of tui:")
|
||||
require.Contains(t, stdout.String(), "-limit")
|
||||
require.Contains(t, stdout.String(), "right-click")
|
||||
require.Contains(t, stdout.String(), "# jump")
|
||||
require.Empty(t, stderr.String())
|
||||
}
|
||||
|
||||
func TestWiretapImportsDesktopDirectMessages(t *testing.T) {
|
||||
@ -183,6 +227,53 @@ func TestWiretapImportsDesktopDirectMessages(t *testing.T) {
|
||||
require.Contains(t, out.String(), "secret DM launch plan")
|
||||
}
|
||||
|
||||
func TestDiscordTUIRowsIncludePaneMetadata(t *testing.T) {
|
||||
rows := discordTUIRows([]store.MessageRow{{
|
||||
MessageID: "m1",
|
||||
GuildID: "@me",
|
||||
GuildName: "Discord Direct Messages",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "Vincent K",
|
||||
AuthorID: "u1",
|
||||
AuthorName: "Peter",
|
||||
Content: "hello from desktop",
|
||||
DisplayContent: "hello from Vincent",
|
||||
CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC),
|
||||
ReplyToMessage: "m0",
|
||||
HasAttachments: true,
|
||||
AttachmentNames: "trace.txt",
|
||||
AttachmentText: "stack trace line one",
|
||||
Pinned: true,
|
||||
}})
|
||||
require.Len(t, rows, 1)
|
||||
require.Equal(t, "hello from Vincent", rows[0].Title)
|
||||
require.Contains(t, rows[0].Detail, "hello from Vincent")
|
||||
require.Contains(t, rows[0].Detail, "Attachments")
|
||||
require.Contains(t, rows[0].Detail, "stack trace line one")
|
||||
require.Equal(t, "hello from Vincent", rows[0].Text)
|
||||
require.Equal(t, "Direct messages", rows[0].Scope)
|
||||
require.Equal(t, "Vincent K", rows[0].Container)
|
||||
require.Contains(t, rows[0].Tags, "dm")
|
||||
require.Equal(t, "true", rows[0].Fields["attachments"])
|
||||
require.Equal(t, "trace.txt", rows[0].Fields["attachment_names"])
|
||||
require.Equal(t, "true", rows[0].Fields["pinned"])
|
||||
require.Equal(t, "m0", rows[0].Fields["reply_to"])
|
||||
require.Equal(t, "@me", rows[0].Fields["guild_id"])
|
||||
|
||||
rows = discordTUIRows([]store.MessageRow{{
|
||||
MessageID: "m2",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c2",
|
||||
AuthorID: "439223656200273932",
|
||||
Content: "desktop-only author",
|
||||
CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC),
|
||||
Source: "discord_desktop",
|
||||
}})
|
||||
require.Equal(t, "user:439223...3932", rows[0].Author)
|
||||
require.Equal(t, "DM c2", discordContainerLabel(store.MessageRow{GuildID: "@me", ChannelID: "c2"}))
|
||||
require.Contains(t, rows[0].Tags, "discord_desktop")
|
||||
}
|
||||
|
||||
func TestParseMessageWindow(t *testing.T) {
|
||||
rt := &runtime{now: func() time.Time {
|
||||
return time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)
|
||||
|
||||
96
internal/cli/control_commands.go
Normal file
96
internal/cli/control_commands.go
Normal file
@ -0,0 +1,96 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/vincentkoc/crawlkit/control"
|
||||
)
|
||||
|
||||
func (r *runtime) runMetadata(args []string) error {
|
||||
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
jsonOut := fs.Bool("json", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("metadata takes flags only"))
|
||||
}
|
||||
if *jsonOut {
|
||||
r.json = true
|
||||
}
|
||||
cfg := config.Default()
|
||||
manifest := control.NewManifest("discrawl", "Discord Crawl", "discrawl")
|
||||
manifest.Description = "Local-first Discord archive crawler."
|
||||
manifest.Branding = control.Branding{SymbolName: "bubble.left.and.bubble.right.fill", AccentColor: "#5865f2", BundleIdentifier: "com.hnc.Discord"}
|
||||
manifest.Paths = control.Paths{
|
||||
DefaultConfig: config.ResolvePath(""),
|
||||
ConfigEnv: config.DefaultConfigEnv,
|
||||
DefaultDatabase: cfg.DBPath,
|
||||
DefaultCache: cfg.CacheDir,
|
||||
DefaultLogs: cfg.LogDir,
|
||||
DefaultShare: cfg.Share.RepoPath,
|
||||
}
|
||||
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "tap", "tui", "git-share", "sql", "embeddings"}
|
||||
manifest.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"discord", "desktop-cache", "sqlite", "git-share"}}
|
||||
manifest.Commands = map[string]control.Command{
|
||||
"status": {Title: "Status", Argv: []string{"discrawl", "status", "--json"}, JSON: true},
|
||||
"doctor": {Title: "Doctor", Argv: []string{"discrawl", "doctor", "--json"}, JSON: true},
|
||||
"sync": {Title: "Sync", Argv: []string{"discrawl", "--json", "sync"}, JSON: true, Mutates: true},
|
||||
"tap": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "tap"}, JSON: true, Mutates: true},
|
||||
"cache-import": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "cache-import"}, JSON: true, Mutates: true},
|
||||
"wiretap": {Title: "Legacy desktop cache import", Argv: []string{"discrawl", "--json", "wiretap"}, JSON: true, Mutates: true, Legacy: true, Deprecated: true},
|
||||
"tui": {Title: "Terminal browser", Argv: []string{"discrawl", "tui"}},
|
||||
"tui-json": {Title: "Terminal browser rows", Argv: []string{"discrawl", "tui", "--json"}, JSON: true},
|
||||
"publish": {Title: "Publish share", Argv: []string{"discrawl", "--json", "publish"}, JSON: true, Mutates: true},
|
||||
"subscribe": {Title: "Subscribe share", Argv: []string{"discrawl", "--json", "subscribe"}, JSON: true, Mutates: true},
|
||||
"update": {Title: "Update share", Argv: []string{"discrawl", "--json", "update"}, JSON: true, Mutates: true},
|
||||
}
|
||||
return r.print(manifest)
|
||||
}
|
||||
|
||||
func controlStatus(configPath string, cfg config.Config, status store.Status, shareNeedsUpdate bool) control.Status {
|
||||
counts := []control.Count{
|
||||
control.NewCount("guilds", "Guilds", int64(status.GuildCount)),
|
||||
control.NewCount("channels", "Channels", int64(status.ChannelCount)),
|
||||
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
|
||||
control.NewCount("messages", "Messages", int64(status.MessageCount)),
|
||||
control.NewCount("members", "Members", int64(status.MemberCount)),
|
||||
control.NewCount("embedding_backlog", "Embedding backlog", int64(status.EmbeddingBacklog)),
|
||||
}
|
||||
out := control.NewStatus("discrawl", fmt.Sprintf("%d messages across %d channels", status.MessageCount, status.ChannelCount))
|
||||
out.State = "current"
|
||||
out.ConfigPath = configPath
|
||||
out.DatabasePath = status.DBPath
|
||||
out.Counts = counts
|
||||
if !status.LastSyncAt.IsZero() {
|
||||
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
|
||||
}
|
||||
db := control.SQLiteDatabase("primary", "Discord archive", "archive", status.DBPath, true, counts)
|
||||
out.DatabaseBytes = db.Bytes
|
||||
out.WALBytes = fileSize(status.DBPath + "-wal")
|
||||
out.Databases = []control.Database{db}
|
||||
out.Share = &control.Share{
|
||||
Enabled: cfg.ShareEnabled(),
|
||||
RepoPath: cfg.Share.RepoPath,
|
||||
Remote: cfg.Share.Remote,
|
||||
Branch: cfg.Share.Branch,
|
||||
NeedsUpdate: shareNeedsUpdate,
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func fileSize(path string) int64 {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return info.Size()
|
||||
}
|
||||
@ -100,11 +100,16 @@ Usage:
|
||||
discrawl [global flags] <command> [args]
|
||||
|
||||
Commands:
|
||||
metadata
|
||||
version
|
||||
init
|
||||
sync
|
||||
tail
|
||||
tap
|
||||
cache-import
|
||||
wiretap
|
||||
search
|
||||
tui
|
||||
messages
|
||||
digest
|
||||
analytics
|
||||
|
||||
@ -96,3 +96,12 @@ func hasBoolFlag(args []string, name string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func hasHelpArg(args []string) bool {
|
||||
for _, arg := range args {
|
||||
if arg == "help" || arg == "--help" || arg == "-h" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
239
internal/cli/tui_commands.go
Normal file
239
internal/cli/tui_commands.go
Normal file
@ -0,0 +1,239 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/vincentkoc/crawlkit/tui"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func (r *runtime) runTUI(args []string) error {
|
||||
fs := flag.NewFlagSet("tui", flag.ContinueOnError)
|
||||
fs.SetOutput(r.stderr)
|
||||
fs.Usage = func() {
|
||||
_, _ = fmt.Fprintln(fs.Output(), "Usage of tui:")
|
||||
fs.PrintDefaults()
|
||||
_, _ = fmt.Fprintln(fs.Output())
|
||||
_, _ = fmt.Fprintln(fs.Output(), tui.ControlsHelp())
|
||||
}
|
||||
if hasHelpArg(args) {
|
||||
fs.SetOutput(r.stdout)
|
||||
}
|
||||
channel := fs.String("channel", "", "channel id")
|
||||
author := fs.String("author", "", "author/user id")
|
||||
limit := fs.Int("limit", 200, "row limit")
|
||||
includeEmpty := fs.Bool("include-empty", false, "include empty messages")
|
||||
dm := fs.Bool("dm", false, "browse direct messages")
|
||||
guildsFlag := fs.String("guilds", "", "comma-separated guild ids")
|
||||
guildFlag := fs.String("guild", "", "guild id")
|
||||
jsonOut := fs.Bool("json", false, "write browser rows as JSON")
|
||||
if len(args) == 1 && args[0] == "help" {
|
||||
fs.Usage()
|
||||
return nil
|
||||
}
|
||||
if err := fs.Parse(args); err != nil {
|
||||
if errors.Is(err, flag.ErrHelp) {
|
||||
return nil
|
||||
}
|
||||
return usageErr(err)
|
||||
}
|
||||
if *jsonOut {
|
||||
r.json = true
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("tui takes flags only"))
|
||||
}
|
||||
if *limit <= 0 {
|
||||
return usageErr(errors.New("tui --limit must be positive"))
|
||||
}
|
||||
guildIDs, err := r.resolveTUIGuilds(*dm, *guildFlag, *guildsFlag)
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if r.store == nil {
|
||||
return tui.Browse(r.ctx, tui.BrowseOptions{
|
||||
AppName: "discrawl",
|
||||
Title: "discrawl archive",
|
||||
EmptyMessage: "discrawl has no local messages yet",
|
||||
JSON: r.json,
|
||||
Layout: tui.LayoutChat,
|
||||
SourceKind: r.archiveSourceKind(),
|
||||
SourceLocation: r.archiveSourceLocation(),
|
||||
Stdout: r.stdout,
|
||||
})
|
||||
}
|
||||
loadRows := func() ([]tui.Row, error) {
|
||||
rows, err := r.store.ListMessagesWithThreadContext(r.ctx, store.MessageListOptions{
|
||||
GuildIDs: guildIDs,
|
||||
Channel: *channel,
|
||||
Author: *author,
|
||||
Last: *limit,
|
||||
IncludeEmpty: *includeEmpty,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return discordTUIRows(rows), nil
|
||||
}
|
||||
archiveRows, err := loadRows()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return tui.Browse(r.ctx, tui.BrowseOptions{
|
||||
AppName: "discrawl",
|
||||
Title: "discrawl archive",
|
||||
EmptyMessage: "discrawl has no local messages yet",
|
||||
Rows: archiveRows,
|
||||
Refresh: func(context.Context) ([]tui.Row, error) { return loadRows() },
|
||||
JSON: r.json,
|
||||
Layout: tui.LayoutChat,
|
||||
SourceKind: r.archiveSourceKind(),
|
||||
SourceLocation: r.archiveSourceLocation(),
|
||||
Stdout: r.stdout,
|
||||
})
|
||||
}
|
||||
|
||||
func (r *runtime) resolveTUIGuilds(dm bool, guild, guilds string) ([]string, error) {
|
||||
guildIDs, err := directMessageGuildScope(dm, guild, guilds)
|
||||
if err != nil || dm || len(guildIDs) > 0 {
|
||||
return guildIDs, err
|
||||
}
|
||||
if defaultGuild := r.cfg.EffectiveDefaultGuildID(); defaultGuild != "" {
|
||||
return []string{defaultGuild}, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *runtime) archiveSourceKind() string {
|
||||
if strings.TrimSpace(r.cfg.Share.Remote) != "" {
|
||||
return tui.SourceRemote
|
||||
}
|
||||
return tui.SourceLocal
|
||||
}
|
||||
|
||||
func (r *runtime) archiveSourceLocation() string {
|
||||
if strings.TrimSpace(r.cfg.Share.Remote) != "" {
|
||||
return r.cfg.Share.Remote
|
||||
}
|
||||
return r.cfg.DBPath
|
||||
}
|
||||
|
||||
func discordTUIRows(rows []store.MessageRow) []tui.Row {
|
||||
items := make([]tui.Row, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
content := discordDisplayContent(row)
|
||||
title := strings.TrimSpace(content)
|
||||
detail := discordDetailContent(row, content)
|
||||
if title == "" {
|
||||
title = firstNonEmpty(strings.TrimSpace(row.AttachmentText), row.MessageID)
|
||||
}
|
||||
tags := []string{row.GuildID, row.ChannelID}
|
||||
if row.GuildID == "@me" {
|
||||
tags = append(tags, "dm")
|
||||
}
|
||||
if row.Source != "" {
|
||||
tags = append(tags, row.Source)
|
||||
}
|
||||
items = append(items, tui.Row{
|
||||
Source: "discord",
|
||||
Kind: "message",
|
||||
ID: row.MessageID,
|
||||
ParentID: row.ReplyToMessage,
|
||||
Scope: discordScopeLabel(row),
|
||||
Container: discordContainerLabel(row),
|
||||
Author: discordAuthorLabel(row),
|
||||
Title: title,
|
||||
Text: content,
|
||||
Detail: detail,
|
||||
URL: discordMessageURL(row),
|
||||
CreatedAt: formatTime(row.CreatedAt),
|
||||
Tags: tags,
|
||||
Fields: map[string]string{
|
||||
"attachment_names": row.AttachmentNames,
|
||||
"attachments": boolString(row.HasAttachments),
|
||||
"author_id": row.AuthorID,
|
||||
"channel_id": row.ChannelID,
|
||||
"guild_id": row.GuildID,
|
||||
"pinned": boolString(row.Pinned),
|
||||
"reply_to": row.ReplyToMessage,
|
||||
"source": row.Source,
|
||||
},
|
||||
})
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
func discordDetailContent(row store.MessageRow, content string) string {
|
||||
var parts []string
|
||||
if strings.TrimSpace(content) != "" {
|
||||
parts = append(parts, strings.TrimSpace(content))
|
||||
}
|
||||
if strings.TrimSpace(row.AttachmentText) != "" {
|
||||
parts = append(parts, "Attachments\n"+strings.TrimSpace(row.AttachmentText))
|
||||
}
|
||||
if len(parts) == 0 {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(parts, "\n\n")
|
||||
}
|
||||
|
||||
func discordDisplayContent(row store.MessageRow) string {
|
||||
if content := strings.TrimSpace(row.DisplayContent); content != "" {
|
||||
return content
|
||||
}
|
||||
return row.Content
|
||||
}
|
||||
|
||||
func discordMessageURL(row store.MessageRow) string {
|
||||
guildID := strings.TrimSpace(row.GuildID)
|
||||
channelID := strings.TrimSpace(row.ChannelID)
|
||||
messageID := strings.TrimSpace(row.MessageID)
|
||||
if guildID == "" || channelID == "" || messageID == "" {
|
||||
return ""
|
||||
}
|
||||
return "https://discord.com/channels/" + guildID + "/" + channelID + "/" + messageID
|
||||
}
|
||||
|
||||
func discordScopeLabel(row store.MessageRow) string {
|
||||
if row.GuildID == "@me" {
|
||||
return "Direct messages"
|
||||
}
|
||||
return firstNonEmpty(row.GuildName, row.GuildID)
|
||||
}
|
||||
|
||||
func discordContainerLabel(row store.MessageRow) string {
|
||||
if row.GuildID == "@me" {
|
||||
return firstNonEmpty(row.ChannelName, "DM "+compactDiscordID(row.ChannelID))
|
||||
}
|
||||
return firstNonEmpty(row.ChannelName, row.ChannelID)
|
||||
}
|
||||
|
||||
func discordAuthorLabel(row store.MessageRow) string {
|
||||
if name := strings.TrimSpace(row.AuthorName); name != "" {
|
||||
return name
|
||||
}
|
||||
if id := strings.TrimSpace(row.AuthorID); id != "" {
|
||||
return "user:" + compactDiscordID(id)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func compactDiscordID(id string) string {
|
||||
id = strings.TrimSpace(id)
|
||||
if len(id) <= 10 {
|
||||
return id
|
||||
}
|
||||
return id[:6] + "..." + id[len(id)-4:]
|
||||
}
|
||||
|
||||
func boolString(value bool) string {
|
||||
if value {
|
||||
return "true"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@ -9,7 +9,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
crawlconfig "github.com/vincentkoc/crawlkit/config"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -85,14 +85,25 @@ type TokenResolution struct {
|
||||
Path string
|
||||
}
|
||||
|
||||
var appConfig = crawlconfig.App{Name: "discrawl", ConfigEnv: DefaultConfigEnv, BaseDir: "~/.discrawl", LegacyBaseDir: "~/.discrawl"}
|
||||
|
||||
func Default() Config {
|
||||
home, _ := os.UserHomeDir()
|
||||
base := filepath.Join(home, ".discrawl")
|
||||
paths, err := appConfig.DefaultPaths()
|
||||
if err != nil {
|
||||
base := filepath.Join(home, ".discrawl")
|
||||
paths = crawlconfig.Paths{
|
||||
DBPath: filepath.Join(base, "discrawl.db"),
|
||||
CacheDir: filepath.Join(base, "cache"),
|
||||
LogDir: filepath.Join(base, "logs"),
|
||||
ShareDir: filepath.Join(base, "share"),
|
||||
}
|
||||
}
|
||||
return Config{
|
||||
Version: 1,
|
||||
DBPath: filepath.Join(base, "discrawl.db"),
|
||||
CacheDir: filepath.Join(base, "cache"),
|
||||
LogDir: filepath.Join(base, "logs"),
|
||||
DBPath: paths.DBPath,
|
||||
CacheDir: paths.CacheDir,
|
||||
LogDir: paths.LogDir,
|
||||
DefaultGuildID: "",
|
||||
Discord: DiscordConfig{
|
||||
TokenSource: "env",
|
||||
@ -124,7 +135,7 @@ func Default() Config {
|
||||
},
|
||||
},
|
||||
Share: ShareConfig{
|
||||
RepoPath: filepath.Join(base, "share"),
|
||||
RepoPath: paths.ShareDir,
|
||||
Branch: "main",
|
||||
AutoUpdate: true,
|
||||
StaleAfter: "15m",
|
||||
@ -145,14 +156,12 @@ func defaultSyncConcurrency() int {
|
||||
}
|
||||
|
||||
func ResolvePath(flagPath string) string {
|
||||
if strings.TrimSpace(flagPath) != "" {
|
||||
return flagPath
|
||||
path, err := appConfig.ResolveConfigPath(flagPath)
|
||||
if err != nil {
|
||||
home, _ := os.UserHomeDir()
|
||||
return filepath.Join(home, ".discrawl", "config.toml")
|
||||
}
|
||||
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
|
||||
return envPath
|
||||
}
|
||||
home, _ := os.UserHomeDir()
|
||||
return filepath.Join(home, ".discrawl", "config.toml")
|
||||
return path
|
||||
}
|
||||
|
||||
func Load(path string) (Config, error) {
|
||||
@ -161,13 +170,9 @@ func Load(path string) (Config, error) {
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
data, err := os.ReadFile(expanded)
|
||||
if err != nil {
|
||||
if err := crawlconfig.LoadTOML(expanded, &cfg); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
if err := toml.Unmarshal(data, &cfg); err != nil {
|
||||
return Config{}, fmt.Errorf("parse config: %w", err)
|
||||
}
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
@ -182,14 +187,7 @@ func Write(path string, cfg Config) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(expanded), 0o755); err != nil {
|
||||
return fmt.Errorf("mkdir config dir: %w", err)
|
||||
}
|
||||
data, err := toml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal config: %w", err)
|
||||
}
|
||||
return os.WriteFile(expanded, data, 0o600)
|
||||
return crawlconfig.WriteTOML(expanded, cfg, 0o600)
|
||||
}
|
||||
|
||||
func (c *Config) Normalize() error {
|
||||
@ -343,35 +341,18 @@ func (c Config) ShareEnabled() bool {
|
||||
}
|
||||
|
||||
func EnsureRuntimeDirs(cfg Config) error {
|
||||
paths := []string{cfg.CacheDir, cfg.LogDir, filepath.Dir(cfg.DBPath)}
|
||||
for _, path := range paths {
|
||||
expanded, err := ExpandPath(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(expanded, 0o755); err != nil {
|
||||
return fmt.Errorf("mkdir %s: %w", expanded, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{
|
||||
DBPath: cfg.DBPath,
|
||||
CacheDir: cfg.CacheDir,
|
||||
LogDir: cfg.LogDir,
|
||||
})
|
||||
}
|
||||
|
||||
func ExpandPath(path string) (string, error) {
|
||||
if strings.TrimSpace(path) == "" {
|
||||
return "", errors.New("empty path")
|
||||
}
|
||||
if strings.HasPrefix(path, "~/") || path == "~" {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("home dir: %w", err)
|
||||
}
|
||||
if path == "~" {
|
||||
path = home
|
||||
} else {
|
||||
path = filepath.Join(home, strings.TrimPrefix(path, "~/"))
|
||||
}
|
||||
}
|
||||
return filepath.Clean(os.ExpandEnv(path)), nil
|
||||
return filepath.Clean(os.ExpandEnv(crawlconfig.ExpandHome(path))), nil
|
||||
}
|
||||
|
||||
func uniqueStrings(in []string) []string {
|
||||
|
||||
@ -12,12 +12,13 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/vincentkoc/crawlkit/mirror"
|
||||
"github.com/vincentkoc/crawlkit/snapshot"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -27,7 +28,7 @@ const (
|
||||
directMessageGuildID = "@me"
|
||||
)
|
||||
|
||||
var ErrNoManifest = errors.New("share manifest not found")
|
||||
var ErrNoManifest = snapshot.ErrNoManifest
|
||||
|
||||
const shardFlushRows = 1024
|
||||
|
||||
@ -73,13 +74,7 @@ type Manifest struct {
|
||||
Files map[string]string `json:"files,omitempty"`
|
||||
}
|
||||
|
||||
type TableManifest struct {
|
||||
Name string `json:"name"`
|
||||
File string `json:"file,omitempty"`
|
||||
Files []string `json:"files,omitempty"`
|
||||
Columns []string `json:"columns"`
|
||||
Rows int `json:"rows"`
|
||||
}
|
||||
type TableManifest = snapshot.TableManifest
|
||||
|
||||
type EmbeddingManifest struct {
|
||||
Provider string `json:"provider"`
|
||||
@ -94,120 +89,52 @@ func EnsureRepo(ctx context.Context, opts Options) error {
|
||||
if strings.TrimSpace(opts.RepoPath) == "" {
|
||||
return errors.New("share repo path is empty")
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(opts.RepoPath, ".git")); err == nil {
|
||||
return nil
|
||||
}
|
||||
if strings.TrimSpace(opts.Remote) != "" {
|
||||
if err := os.MkdirAll(filepath.Dir(opts.RepoPath), 0o755); err != nil {
|
||||
return fmt.Errorf("mkdir share parent: %w", err)
|
||||
}
|
||||
if err := run(ctx, "", "git", "clone", opts.Remote, opts.RepoPath); err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(opts.Branch) != "" {
|
||||
if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", opts.Branch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(opts.RepoPath, 0o755); err != nil {
|
||||
return fmt.Errorf("mkdir share repo: %w", err)
|
||||
}
|
||||
if err := run(ctx, opts.RepoPath, "git", "init"); err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(opts.Branch) != "" {
|
||||
if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", opts.Branch); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return mirror.EnsureRepo(ctx, mirrorOptions(opts))
|
||||
}
|
||||
|
||||
func Pull(ctx context.Context, opts Options) error {
|
||||
if strings.TrimSpace(opts.Remote) == "" {
|
||||
if strings.TrimSpace(opts.Remote) == "" && strings.TrimSpace(opts.RepoPath) == "" {
|
||||
return nil
|
||||
}
|
||||
if err := EnsureRepo(ctx, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := run(ctx, opts.RepoPath, "git", "fetch", "--prune", "origin"); err != nil {
|
||||
return err
|
||||
}
|
||||
branch := opts.Branch
|
||||
if strings.TrimSpace(branch) == "" {
|
||||
branch = "main"
|
||||
}
|
||||
remoteRef := "refs/remotes/origin/" + branch
|
||||
if _, err := output(ctx, opts.RepoPath, "git", "rev-parse", "--verify", remoteRef); err != nil {
|
||||
return run(ctx, opts.RepoPath, "git", "checkout", "-B", branch)
|
||||
}
|
||||
if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", branch, "origin/"+branch); err != nil {
|
||||
return err
|
||||
}
|
||||
return run(ctx, opts.RepoPath, "git", "pull", "--ff-only", "origin", branch)
|
||||
return mirror.Pull(ctx, mirrorOptions(opts))
|
||||
}
|
||||
|
||||
func Commit(ctx context.Context, opts Options, message string) (bool, error) {
|
||||
if err := run(ctx, opts.RepoPath, "git", "add", "."); err != nil {
|
||||
return false, err
|
||||
}
|
||||
out, err := output(ctx, opts.RepoPath, "git", "status", "--porcelain")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if strings.TrimSpace(out) == "" {
|
||||
return false, nil
|
||||
}
|
||||
if strings.TrimSpace(message) == "" {
|
||||
message = "sync: discord archive"
|
||||
}
|
||||
if err := run(ctx, opts.RepoPath, "git", "commit", "-m", message); err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
return mirror.Commit(ctx, mirrorOptions(opts), message)
|
||||
}
|
||||
|
||||
func Push(ctx context.Context, opts Options) error {
|
||||
branch := opts.Branch
|
||||
if strings.TrimSpace(branch) == "" {
|
||||
branch = "main"
|
||||
if err := mirror.Push(ctx, mirrorOptions(opts)); err != nil {
|
||||
branch := opts.Branch
|
||||
if strings.TrimSpace(branch) == "" {
|
||||
branch = "main"
|
||||
}
|
||||
return fmt.Errorf("git push -u origin %s: %w", branch, err)
|
||||
}
|
||||
out, err := output(ctx, opts.RepoPath, "git", "push", "-u", "origin", branch)
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if !isNonFastForwardPush(out) {
|
||||
return fmt.Errorf("git push -u origin %s: %w\n%s", branch, err, strings.TrimSpace(out))
|
||||
}
|
||||
if pullErr := run(ctx, opts.RepoPath, "git", "pull", "--rebase", "--autostash", "origin", branch); pullErr != nil {
|
||||
return fmt.Errorf("rebase before push retry: %w", pullErr)
|
||||
}
|
||||
return run(ctx, opts.RepoPath, "git", "push", "-u", "origin", branch)
|
||||
return nil
|
||||
}
|
||||
|
||||
func Export(ctx context.Context, s *store.Store, opts Options) (Manifest, error) {
|
||||
if err := EnsureRepo(ctx, opts); err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
if err := os.RemoveAll(filepath.Join(opts.RepoPath, "tables")); err != nil {
|
||||
return Manifest{}, fmt.Errorf("reset tables dir: %w", err)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Join(opts.RepoPath, "tables"), 0o755); err != nil {
|
||||
return Manifest{}, fmt.Errorf("mkdir tables dir: %w", err)
|
||||
base, err := snapshot.Export(ctx, snapshot.ExportOptions{
|
||||
DB: s.DB(),
|
||||
RootDir: opts.RepoPath,
|
||||
Tables: SnapshotTables,
|
||||
MaxShardBytes: maxShardBytes,
|
||||
Filter: func(table string, row map[string]any) (bool, error) {
|
||||
return !isDirectMessageSnapshotRow(table, row), nil
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
manifest := Manifest{
|
||||
Version: 1,
|
||||
GeneratedAt: time.Now().UTC(),
|
||||
Files: map[string]string{"manifest": ManifestName},
|
||||
}
|
||||
for _, table := range SnapshotTables {
|
||||
entry, err := exportTable(ctx, s.DB(), opts.RepoPath, table)
|
||||
if err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
manifest.Tables = append(manifest.Tables, entry)
|
||||
Version: base.Version,
|
||||
GeneratedAt: base.GeneratedAt,
|
||||
Tables: base.Tables,
|
||||
Files: base.Files,
|
||||
}
|
||||
if opts.IncludeEmbeddings {
|
||||
entry, err := exportEmbeddings(ctx, s.DB(), opts)
|
||||
@ -243,53 +170,51 @@ func Import(ctx context.Context, s *store.Store, opts Options) (Manifest, error)
|
||||
_ = restorePragmas(ctx)
|
||||
}
|
||||
}()
|
||||
tx, err := s.DB().BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
if _, err := snapshot.Import(ctx, snapshot.ImportOptions{
|
||||
DB: s.DB(),
|
||||
RootDir: opts.RepoPath,
|
||||
DeleteTables: SnapshotTables,
|
||||
Progress: func(progress snapshot.ImportProgress) {
|
||||
opts.reportProgress(ImportProgress{
|
||||
Phase: progress.Phase,
|
||||
Table: progress.Table,
|
||||
File: progress.File,
|
||||
FileIndex: progress.FileIndex,
|
||||
FileCount: progress.FileCount,
|
||||
Rows: progress.Rows,
|
||||
TotalRows: progress.TotalRows,
|
||||
})
|
||||
},
|
||||
Filter: func(table string, row map[string]any) (bool, error) {
|
||||
return !isDirectMessageSnapshotRow(table, row), nil
|
||||
},
|
||||
BeforeImport: func(ctx context.Context, tx *sql.Tx) error {
|
||||
for _, table := range []string{"message_fts", "member_fts"} {
|
||||
if _, err := tx.ExecContext(ctx, "drop table if exists "+table); err != nil {
|
||||
return fmt.Errorf("drop %s: %w", table, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
},
|
||||
DeleteTable: func(ctx context.Context, tx *sql.Tx, table string) error {
|
||||
query, args := snapshotDeleteQuery(table)
|
||||
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
|
||||
return fmt.Errorf("clear %s: %w", table, err)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
AfterImport: func(ctx context.Context, tx *sql.Tx) error {
|
||||
if err := repairImportedGuildIDs(ctx, tx); err != nil {
|
||||
return err
|
||||
}
|
||||
if opts.IncludeEmbeddings {
|
||||
return importEmbeddings(ctx, tx, opts, manifest.Embeddings)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}); err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
committed := false
|
||||
defer func() {
|
||||
if !committed {
|
||||
_ = tx.Rollback()
|
||||
}
|
||||
}()
|
||||
for _, table := range []string{"message_fts", "member_fts"} {
|
||||
opts.reportProgress(ImportProgress{Phase: "drop_fts", Table: table})
|
||||
if _, err := tx.ExecContext(ctx, "drop table if exists "+table); err != nil {
|
||||
return Manifest{}, fmt.Errorf("drop %s: %w", table, err)
|
||||
}
|
||||
}
|
||||
for _, table := range slices.Backward(SnapshotTables) {
|
||||
opts.reportProgress(ImportProgress{Phase: "clear", Table: table})
|
||||
query, args := snapshotDeleteQuery(table)
|
||||
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
|
||||
return Manifest{}, fmt.Errorf("clear %s: %w", table, err)
|
||||
}
|
||||
}
|
||||
for _, table := range manifest.Tables {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
opts.reportProgress(ImportProgress{Phase: "table_start", Table: table.Name, TotalRows: table.Rows})
|
||||
if err := importTable(ctx, tx, opts, table); err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
opts.reportProgress(ImportProgress{Phase: "table_done", Table: table.Name, TotalRows: table.Rows})
|
||||
}
|
||||
opts.reportProgress(ImportProgress{Phase: "repair"})
|
||||
if err := repairImportedGuildIDs(ctx, tx); err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
if opts.IncludeEmbeddings {
|
||||
if err := importEmbeddings(ctx, tx, opts, manifest.Embeddings); err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
}
|
||||
opts.reportProgress(ImportProgress{Phase: "commit"})
|
||||
if err := tx.Commit(); err != nil {
|
||||
return Manifest{}, err
|
||||
}
|
||||
committed = true
|
||||
opts.reportProgress(ImportProgress{Phase: "rebuild_fts"})
|
||||
if err := s.RebuildSearchIndexes(ctx); err != nil {
|
||||
return Manifest{}, err
|
||||
@ -436,6 +361,10 @@ func ReadManifest(repoPath string) (Manifest, error) {
|
||||
return manifest, nil
|
||||
}
|
||||
|
||||
func mirrorOptions(opts Options) mirror.Options {
|
||||
return mirror.Options{RepoPath: opts.RepoPath, Remote: opts.Remote, Branch: opts.Branch}
|
||||
}
|
||||
|
||||
func NeedsImport(ctx context.Context, s *store.Store, staleAfter time.Duration) bool {
|
||||
if staleAfter <= 0 {
|
||||
staleAfter = 15 * time.Minute
|
||||
|
||||
@ -184,6 +184,26 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "channels")), directMessageGuildID)
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "messages")), "private dm content")
|
||||
require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "sync_state")), "wiretap:last_import")
|
||||
manifest = appendSnapshotRow(t, repo, manifest, "messages", map[string]any{
|
||||
"id": "hostile-dm",
|
||||
"guild_id": directMessageGuildID,
|
||||
"channel_id": "dm-c2",
|
||||
"author_id": "u9",
|
||||
"message_type": 0,
|
||||
"created_at": "2026-04-24T16:00:00Z",
|
||||
"content": "hostile imported dm",
|
||||
"normalized_content": "hostile imported dm",
|
||||
"pinned": 0,
|
||||
"has_attachments": 0,
|
||||
"raw_json": `{}`,
|
||||
"updated_at": "2026-04-24T16:00:00Z",
|
||||
})
|
||||
manifest = appendSnapshotRow(t, repo, manifest, "sync_state", map[string]any{
|
||||
"scope": "wiretap:hostile",
|
||||
"cursor": "private",
|
||||
"updated_at": "2026-04-24T16:00:00Z",
|
||||
})
|
||||
writeShareManifest(t, repo, manifest)
|
||||
|
||||
dst, err := store.Open(ctx, filepath.Join(t.TempDir(), "dst.db"))
|
||||
require.NoError(t, err)
|
||||
@ -202,6 +222,12 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) {
|
||||
wiretapState, err := dst.GetSyncState(ctx, "wiretap:last_import")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "2026-04-24T15:33:17Z", wiretapState)
|
||||
hostileResults, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "hostile imported dm", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hostileResults)
|
||||
_, rows, err := dst.ReadOnlyQuery(ctx, "select count(*) from sync_state where scope = 'wiretap:hostile'")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "0", rows[0][0])
|
||||
}
|
||||
|
||||
func TestExportImportEmbeddingsOptIn(t *testing.T) {
|
||||
@ -800,6 +826,33 @@ func writeGzipJSONLines(t *testing.T, path string, lines []string) {
|
||||
require.NoError(t, file.Close())
|
||||
}
|
||||
|
||||
func appendSnapshotRow(t *testing.T, repo string, manifest Manifest, tableName string, row map[string]any) Manifest {
|
||||
t.Helper()
|
||||
for i := range manifest.Tables {
|
||||
if manifest.Tables[i].Name != tableName {
|
||||
continue
|
||||
}
|
||||
rel := filepath.ToSlash(filepath.Join("tables", tableName, "hostile-"+strconv.Itoa(len(manifest.Tables[i].Files))+".jsonl.gz"))
|
||||
full := filepath.Join(repo, filepath.FromSlash(rel))
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755))
|
||||
body, err := json.Marshal(row)
|
||||
require.NoError(t, err)
|
||||
writeGzipJSONLines(t, full, []string{string(body)})
|
||||
manifest.Tables[i].Files = append(manifest.Tables[i].Files, rel)
|
||||
manifest.Tables[i].Rows++
|
||||
return manifest
|
||||
}
|
||||
t.Fatalf("table %s not found", tableName)
|
||||
return manifest
|
||||
}
|
||||
|
||||
func writeShareManifest(t *testing.T, repo string, manifest Manifest) {
|
||||
t.Helper()
|
||||
body, err := json.MarshalIndent(manifest, "", " ")
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, os.WriteFile(filepath.Join(repo, ManifestName), append(body, '\n'), 0o600))
|
||||
}
|
||||
|
||||
func snapshotTableText(t *testing.T, repo string, table TableManifest) string {
|
||||
t.Helper()
|
||||
return snapshotFilesText(t, repo, table.Files)
|
||||
|
||||
@ -92,6 +92,8 @@ func TestAttachmentTextAndMentionsAreQueryable(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 1)
|
||||
require.Contains(t, messages[0].Content, "stack trace")
|
||||
require.Equal(t, "trace.txt", messages[0].AttachmentNames)
|
||||
require.Contains(t, messages[0].AttachmentText, "stack trace line one")
|
||||
|
||||
mentions, err := s.ListMentions(ctx, MentionListOptions{Target: "Shadow", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
@ -116,3 +118,72 @@ func TestAttachmentTextAndMentionsAreQueryable(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Len(t, filtered, 1)
|
||||
}
|
||||
|
||||
func TestListMessagesResolvesMentionNamesForDisplay(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := context.Background()
|
||||
s, err := Open(ctx, filepath.Join(t.TempDir(), "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = s.Close() }()
|
||||
|
||||
require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "maintainers", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertMember(ctx, MemberRecord{
|
||||
GuildID: "g1",
|
||||
UserID: "u4",
|
||||
Username: "fallback",
|
||||
DisplayName: "Fallback User",
|
||||
RoleIDsJSON: `[]`,
|
||||
RawJSON: `{}`,
|
||||
}))
|
||||
|
||||
createdAt := time.Now().UTC().Format(time.RFC3339Nano)
|
||||
rawContent := "ping <@u2> <@!u3> <@&r1> in <#c1>"
|
||||
fallbackContent := "ask <@u4> in <#c1>"
|
||||
require.NoError(t, s.UpsertMessages(ctx, []MessageMutation{
|
||||
{
|
||||
Record: MessageRecord{
|
||||
ID: "m1",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "maintainers",
|
||||
AuthorID: "u1",
|
||||
AuthorName: "Peter",
|
||||
MessageType: 0,
|
||||
CreatedAt: createdAt,
|
||||
Content: rawContent,
|
||||
NormalizedContent: rawContent,
|
||||
RawJSON: `{}`,
|
||||
},
|
||||
Mentions: []MentionEventRecord{
|
||||
{MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "user", TargetID: "u2", TargetName: "Shadow", EventAt: createdAt},
|
||||
{MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "user", TargetID: "u3", TargetName: "Vincent", EventAt: createdAt},
|
||||
{MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "role", TargetID: "r1", TargetName: "Maintainers", EventAt: createdAt},
|
||||
{MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "channel", TargetID: "c1", TargetName: "maintainers", EventAt: createdAt},
|
||||
},
|
||||
},
|
||||
{
|
||||
Record: MessageRecord{
|
||||
ID: "m2",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "maintainers",
|
||||
AuthorID: "u1",
|
||||
AuthorName: "Peter",
|
||||
MessageType: 0,
|
||||
CreatedAt: createdAt,
|
||||
Content: fallbackContent,
|
||||
NormalizedContent: fallbackContent,
|
||||
RawJSON: `{}`,
|
||||
},
|
||||
},
|
||||
}))
|
||||
|
||||
messages, err := s.ListMessages(ctx, MessageListOptions{Channel: "maintainers", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, messages, 2)
|
||||
require.Equal(t, rawContent, messages[0].Content)
|
||||
require.Equal(t, "ping @Shadow @Vincent @Maintainers in #maintainers", messages[0].DisplayContent)
|
||||
require.Equal(t, fallbackContent, messages[1].Content)
|
||||
require.Equal(t, "ask @Fallback User in #maintainers", messages[1].DisplayContent)
|
||||
}
|
||||
|
||||
@ -2,10 +2,16 @@ package store
|
||||
|
||||
import (
|
||||
"context"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
discordUserMentionRE = regexp.MustCompile(`<@!?([A-Za-z0-9]+)>`)
|
||||
discordChannelMentionRE = regexp.MustCompile(`<#([A-Za-z0-9]+)>`)
|
||||
)
|
||||
|
||||
type MessageListOptions struct {
|
||||
GuildIDs []string
|
||||
Channel string
|
||||
@ -29,17 +35,22 @@ type MentionListOptions struct {
|
||||
}
|
||||
|
||||
type MessageRow struct {
|
||||
MessageID string `json:"message_id"`
|
||||
GuildID string `json:"guild_id"`
|
||||
ChannelID string `json:"channel_id"`
|
||||
ChannelName string `json:"channel_name"`
|
||||
AuthorID string `json:"author_id"`
|
||||
AuthorName string `json:"author_name"`
|
||||
Content string `json:"content"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
ReplyToMessage string `json:"reply_to_message_id,omitempty"`
|
||||
HasAttachments bool `json:"has_attachments"`
|
||||
Pinned bool `json:"pinned"`
|
||||
MessageID string `json:"message_id"`
|
||||
GuildID string `json:"guild_id"`
|
||||
GuildName string `json:"guild_name,omitempty"`
|
||||
ChannelID string `json:"channel_id"`
|
||||
ChannelName string `json:"channel_name"`
|
||||
AuthorID string `json:"author_id"`
|
||||
AuthorName string `json:"author_name"`
|
||||
Content string `json:"content"`
|
||||
DisplayContent string `json:"display_content,omitempty"`
|
||||
CreatedAt time.Time `json:"created_at"`
|
||||
ReplyToMessage string `json:"reply_to_message_id,omitempty"`
|
||||
Source string `json:"source,omitempty"`
|
||||
HasAttachments bool `json:"has_attachments"`
|
||||
AttachmentNames string `json:"attachment_names,omitempty"`
|
||||
AttachmentText string `json:"attachment_text,omitempty"`
|
||||
Pinned bool `json:"pinned"`
|
||||
}
|
||||
|
||||
func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]MessageRow, error) {
|
||||
@ -75,6 +86,7 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
|
||||
select
|
||||
m.id,
|
||||
m.guild_id,
|
||||
coalesce(g.name, ''),
|
||||
m.channel_id,
|
||||
coalesce(c.name, ''),
|
||||
coalesce(m.author_id, ''),
|
||||
@ -93,9 +105,13 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
|
||||
end,
|
||||
m.created_at,
|
||||
coalesce(m.reply_to_message_id, ''),
|
||||
coalesce(json_extract(m.raw_json, '$.source'), ''),
|
||||
m.has_attachments,
|
||||
coalesce((select group_concat(a.filename, ', ') from message_attachments a where a.message_id = m.id), ''),
|
||||
coalesce((select group_concat(a.text_content, char(10)) from message_attachments a where a.message_id = m.id and trim(a.text_content) <> ''), ''),
|
||||
m.pinned
|
||||
from messages m
|
||||
left join guilds g on g.id = m.guild_id
|
||||
left join channels c on c.id = m.channel_id
|
||||
left join members mem on mem.guild_id = m.guild_id and mem.user_id = m.author_id
|
||||
where ` + strings.Join(clauses, " and ") + `
|
||||
@ -139,6 +155,7 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
|
||||
if err := rows.Scan(
|
||||
&row.MessageID,
|
||||
&row.GuildID,
|
||||
&row.GuildName,
|
||||
&row.ChannelID,
|
||||
&row.ChannelName,
|
||||
&row.AuthorID,
|
||||
@ -146,7 +163,10 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
|
||||
&row.Content,
|
||||
&created,
|
||||
&row.ReplyToMessage,
|
||||
&row.Source,
|
||||
&hasAttachments,
|
||||
&row.AttachmentNames,
|
||||
&row.AttachmentText,
|
||||
&pinned,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
@ -154,11 +174,364 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me
|
||||
row.CreatedAt = parseTime(created)
|
||||
row.HasAttachments = hasAttachments == 1
|
||||
row.Pinned = pinned == 1
|
||||
row.DisplayContent = row.Content
|
||||
out = append(out, row)
|
||||
}
|
||||
return out, rows.Err()
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, s.resolveMessageDisplayMentions(ctx, out)
|
||||
}
|
||||
|
||||
func (s *Store) ListMessagesWithThreadContext(ctx context.Context, opts MessageListOptions) ([]MessageRow, error) {
|
||||
rows, err := s.ListMessages(ctx, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s.hydrateMessageThreadContext(ctx, rows, opts.Limit+opts.Last)
|
||||
}
|
||||
|
||||
func (s *Store) hydrateMessageThreadContext(ctx context.Context, rows []MessageRow, limit int) ([]MessageRow, error) {
|
||||
if len(rows) == 0 {
|
||||
return rows, nil
|
||||
}
|
||||
rootIDs := make([]any, 0, len(rows))
|
||||
seenRoots := map[string]struct{}{}
|
||||
visible := map[string]struct{}{}
|
||||
for _, row := range rows {
|
||||
if id := strings.TrimSpace(row.MessageID); id != "" {
|
||||
visible[id] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, row := range rows {
|
||||
rootID := strings.TrimSpace(row.ReplyToMessage)
|
||||
if rootID == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := visible[rootID]; ok {
|
||||
continue
|
||||
}
|
||||
if _, ok := seenRoots[rootID]; ok {
|
||||
continue
|
||||
}
|
||||
seenRoots[rootID] = struct{}{}
|
||||
rootIDs = append(rootIDs, rootID)
|
||||
}
|
||||
if len(rootIDs) == 0 {
|
||||
return rows, nil
|
||||
}
|
||||
query := `
|
||||
select
|
||||
m.id,
|
||||
m.guild_id,
|
||||
coalesce(g.name, ''),
|
||||
m.channel_id,
|
||||
coalesce(c.name, ''),
|
||||
coalesce(m.author_id, ''),
|
||||
coalesce(
|
||||
nullif(mem.display_name, ''),
|
||||
nullif(mem.nick, ''),
|
||||
nullif(mem.global_name, ''),
|
||||
nullif(mem.username, ''),
|
||||
nullif(json_extract(m.raw_json, '$.author.global_name'), ''),
|
||||
nullif(json_extract(m.raw_json, '$.author.username'), ''),
|
||||
''
|
||||
),
|
||||
case
|
||||
when trim(coalesce(m.content, '')) <> '' then m.content
|
||||
else m.normalized_content
|
||||
end,
|
||||
m.created_at,
|
||||
coalesce(m.reply_to_message_id, ''),
|
||||
coalesce(json_extract(m.raw_json, '$.source'), ''),
|
||||
m.has_attachments,
|
||||
coalesce((select group_concat(a.filename, ', ') from message_attachments a where a.message_id = m.id), ''),
|
||||
coalesce((select group_concat(a.text_content, char(10)) from message_attachments a where a.message_id = m.id and trim(a.text_content) <> ''), ''),
|
||||
m.pinned
|
||||
from messages m
|
||||
left join guilds g on g.id = m.guild_id
|
||||
left join channels c on c.id = m.channel_id
|
||||
left join members mem on mem.guild_id = m.guild_id and mem.user_id = m.author_id
|
||||
where m.id in (` + placeholders(len(rootIDs)) + `)
|
||||
order by m.created_at asc, m.id asc`
|
||||
contextRows, err := s.db.QueryContext(ctx, query, rootIDs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { _ = contextRows.Close() }()
|
||||
extra, err := scanMessageRows(contextRows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := s.resolveMessageDisplayMentions(ctx, extra); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return mergeMessageRows(rows, extra), nil
|
||||
}
|
||||
|
||||
func scanMessageRows(rows rowScanner) ([]MessageRow, error) {
|
||||
var out []MessageRow
|
||||
for rows.Next() {
|
||||
var row MessageRow
|
||||
var created string
|
||||
var hasAttachments int
|
||||
var pinned int
|
||||
if err := rows.Scan(
|
||||
&row.MessageID,
|
||||
&row.GuildID,
|
||||
&row.GuildName,
|
||||
&row.ChannelID,
|
||||
&row.ChannelName,
|
||||
&row.AuthorID,
|
||||
&row.AuthorName,
|
||||
&row.Content,
|
||||
&created,
|
||||
&row.ReplyToMessage,
|
||||
&row.Source,
|
||||
&hasAttachments,
|
||||
&row.AttachmentNames,
|
||||
&row.AttachmentText,
|
||||
&pinned,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
row.CreatedAt = parseTime(created)
|
||||
row.HasAttachments = hasAttachments == 1
|
||||
row.Pinned = pinned == 1
|
||||
row.DisplayContent = row.Content
|
||||
out = append(out, row)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
type rowScanner interface {
|
||||
Next() bool
|
||||
Scan(dest ...any) error
|
||||
Err() error
|
||||
}
|
||||
|
||||
func mergeMessageRows(primary, extra []MessageRow) []MessageRow {
|
||||
out := make([]MessageRow, 0, len(primary)+len(extra))
|
||||
seen := map[string]struct{}{}
|
||||
appendRow := func(row MessageRow) {
|
||||
key := row.GuildID + "\x00" + row.ChannelID + "\x00" + row.MessageID
|
||||
if _, ok := seen[key]; ok {
|
||||
return
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
out = append(out, row)
|
||||
}
|
||||
for _, row := range primary {
|
||||
appendRow(row)
|
||||
}
|
||||
for _, row := range extra {
|
||||
appendRow(row)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func normalizeChannelFilter(raw string) string {
|
||||
return strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(raw), "#"))
|
||||
}
|
||||
|
||||
func (s *Store) resolveMessageDisplayMentions(ctx context.Context, rows []MessageRow) error {
|
||||
if len(rows) == 0 {
|
||||
return nil
|
||||
}
|
||||
ids := make([]any, 0, len(rows))
|
||||
indexByID := make(map[string]int, len(rows))
|
||||
for index, row := range rows {
|
||||
id := strings.TrimSpace(row.MessageID)
|
||||
if id == "" {
|
||||
continue
|
||||
}
|
||||
ids = append(ids, id)
|
||||
indexByID[id] = index
|
||||
}
|
||||
if len(ids) == 0 {
|
||||
return nil
|
||||
}
|
||||
query := `select message_id, target_type, target_id, target_name from mention_events where message_id in (` + placeholders(len(ids)) + `)`
|
||||
mentionRows, err := s.db.QueryContext(ctx, query, ids...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = mentionRows.Close() }()
|
||||
for mentionRows.Next() {
|
||||
var messageID, targetType, targetID, targetName string
|
||||
if err := mentionRows.Scan(&messageID, &targetType, &targetID, &targetName); err != nil {
|
||||
return err
|
||||
}
|
||||
index, ok := indexByID[messageID]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
rows[index].DisplayContent = replaceDiscordMention(rows[index].DisplayContent, targetType, targetID, targetName)
|
||||
}
|
||||
if err := mentionRows.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.resolveInlineDiscordMentions(ctx, rows)
|
||||
}
|
||||
|
||||
func replaceDiscordMention(content, targetType, targetID, targetName string) string {
|
||||
targetID = strings.TrimSpace(targetID)
|
||||
if targetID == "" {
|
||||
return content
|
||||
}
|
||||
label := strings.TrimSpace(targetName)
|
||||
if label == "" {
|
||||
label = targetID
|
||||
}
|
||||
switch strings.TrimSpace(targetType) {
|
||||
case "role":
|
||||
return strings.ReplaceAll(content, "<@&"+targetID+">", "@"+label)
|
||||
case "channel":
|
||||
return strings.ReplaceAll(content, "<#"+targetID+">", "#"+label)
|
||||
default:
|
||||
content = strings.ReplaceAll(content, "<@"+targetID+">", "@"+label)
|
||||
return strings.ReplaceAll(content, "<@!"+targetID+">", "@"+label)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) resolveInlineDiscordMentions(ctx context.Context, rows []MessageRow) error {
|
||||
userIDs := map[string]struct{}{}
|
||||
channelIDs := map[string]struct{}{}
|
||||
for _, row := range rows {
|
||||
for _, match := range discordUserMentionRE.FindAllStringSubmatch(row.DisplayContent, -1) {
|
||||
if len(match) > 1 && strings.TrimSpace(match[1]) != "" {
|
||||
userIDs[match[1]] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, match := range discordChannelMentionRE.FindAllStringSubmatch(row.DisplayContent, -1) {
|
||||
if len(match) > 1 && strings.TrimSpace(match[1]) != "" {
|
||||
channelIDs[match[1]] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
userNames, err := s.discordMemberDisplayNames(ctx, userIDs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
channelNames, err := s.discordChannelNames(ctx, channelIDs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for index := range rows {
|
||||
guildID := strings.TrimSpace(rows[index].GuildID)
|
||||
rows[index].DisplayContent = discordUserMentionRE.ReplaceAllStringFunc(rows[index].DisplayContent, func(match string) string {
|
||||
parts := discordUserMentionRE.FindStringSubmatch(match)
|
||||
if len(parts) < 2 {
|
||||
return match
|
||||
}
|
||||
if name := firstResolvedDiscordName(userNames, guildID, parts[1]); name != "" {
|
||||
return "@" + name
|
||||
}
|
||||
return match
|
||||
})
|
||||
rows[index].DisplayContent = discordChannelMentionRE.ReplaceAllStringFunc(rows[index].DisplayContent, func(match string) string {
|
||||
parts := discordChannelMentionRE.FindStringSubmatch(match)
|
||||
if len(parts) < 2 {
|
||||
return match
|
||||
}
|
||||
if name := firstResolvedDiscordName(channelNames, guildID, parts[1]); name != "" {
|
||||
return "#" + name
|
||||
}
|
||||
return match
|
||||
})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Store) discordMemberDisplayNames(ctx context.Context, ids map[string]struct{}) (map[string]string, error) {
|
||||
if len(ids) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
args := mapKeysAsAny(ids)
|
||||
query := `
|
||||
select guild_id, user_id,
|
||||
coalesce(
|
||||
nullif(display_name, ''),
|
||||
nullif(nick, ''),
|
||||
nullif(global_name, ''),
|
||||
nullif(username, ''),
|
||||
''
|
||||
)
|
||||
from members
|
||||
where user_id in (` + placeholders(len(args)) + `)
|
||||
`
|
||||
rows, err := s.db.QueryContext(ctx, query, args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { _ = rows.Close() }()
|
||||
out := map[string]string{}
|
||||
for rows.Next() {
|
||||
var guildID, userID, name string
|
||||
if err := rows.Scan(&guildID, &userID, &name); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rememberResolvedDiscordName(out, guildID, userID, name)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (s *Store) discordChannelNames(ctx context.Context, ids map[string]struct{}) (map[string]string, error) {
|
||||
if len(ids) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
args := mapKeysAsAny(ids)
|
||||
query := `select guild_id, id, coalesce(nullif(name, ''), '') from channels where id in (` + placeholders(len(args)) + `)`
|
||||
rows, err := s.db.QueryContext(ctx, query, args...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { _ = rows.Close() }()
|
||||
out := map[string]string{}
|
||||
for rows.Next() {
|
||||
var guildID, channelID, name string
|
||||
if err := rows.Scan(&guildID, &channelID, &name); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rememberResolvedDiscordName(out, guildID, channelID, name)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func mapKeysAsAny(values map[string]struct{}) []any {
|
||||
out := make([]any, 0, len(values))
|
||||
for value := range values {
|
||||
out = append(out, value)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func rememberResolvedDiscordName(out map[string]string, guildID, id, name string) {
|
||||
id = strings.TrimSpace(id)
|
||||
name = strings.TrimSpace(name)
|
||||
if id == "" || name == "" {
|
||||
return
|
||||
}
|
||||
if guildID = strings.TrimSpace(guildID); guildID != "" {
|
||||
out[guildID+"|"+id] = name
|
||||
}
|
||||
if _, ok := out["|"+id]; !ok {
|
||||
out["|"+id] = name
|
||||
}
|
||||
}
|
||||
|
||||
func firstResolvedDiscordName(values map[string]string, guildID, id string) string {
|
||||
id = strings.TrimSpace(id)
|
||||
if id == "" {
|
||||
return ""
|
||||
}
|
||||
if guildID = strings.TrimSpace(guildID); guildID != "" {
|
||||
if value := strings.TrimSpace(values[guildID+"|"+id]); value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(values["|"+id])
|
||||
}
|
||||
|
||||
@ -5,13 +5,10 @@ import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
_ "modernc.org/sqlite"
|
||||
crawlstore "github.com/vincentkoc/crawlkit/store"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -114,66 +111,33 @@ type ChannelRow struct {
|
||||
}
|
||||
|
||||
func Open(ctx context.Context, path string) (*Store, error) {
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
return nil, fmt.Errorf("mkdir db dir: %w", err)
|
||||
}
|
||||
if err := ensureDBFile(path); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dsn := fmt.Sprintf(
|
||||
"file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)",
|
||||
path,
|
||||
)
|
||||
db, err := sql.Open("sqlite", dsn)
|
||||
base, err := crawlstore.Open(ctx, crawlstore.Options{Path: path})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open sqlite: %w", err)
|
||||
}
|
||||
// SQLite is single-writer; keep one shared connection so concurrent callers queue
|
||||
// instead of contending on separate writer connections.
|
||||
db.SetMaxOpenConns(1)
|
||||
db.SetMaxIdleConns(1)
|
||||
if err := db.PingContext(ctx); err != nil {
|
||||
_ = db.Close()
|
||||
return nil, fmt.Errorf("ping sqlite: %w", err)
|
||||
}
|
||||
if err := tightenDBFilePerms(path); err != nil {
|
||||
_ = db.Close()
|
||||
return nil, err
|
||||
}
|
||||
db := base.DB()
|
||||
store := &Store{db: db, path: path}
|
||||
if err := store.migrate(ctx); err != nil {
|
||||
_ = db.Close()
|
||||
_ = base.Close()
|
||||
return nil, err
|
||||
}
|
||||
return store, nil
|
||||
}
|
||||
|
||||
func ensureDBFile(path string) error {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return nil
|
||||
} else if !os.IsNotExist(err) {
|
||||
return fmt.Errorf("stat db file: %w", err)
|
||||
func OpenReadOnly(ctx context.Context, path string) (*Store, error) {
|
||||
base, err := crawlstore.OpenReadOnly(ctx, path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("create db file: %w", err)
|
||||
store := &Store{db: base.DB(), path: path}
|
||||
if version, err := store.schemaVersion(ctx); err != nil {
|
||||
_ = base.Close()
|
||||
return nil, err
|
||||
} else if version != storeSchemaVersion {
|
||||
_ = base.Close()
|
||||
return nil, fmt.Errorf("database schema version mismatch: got %d want %d", version, storeSchemaVersion)
|
||||
}
|
||||
if file != nil {
|
||||
if closeErr := file.Close(); closeErr != nil {
|
||||
return fmt.Errorf("close db file: %w", closeErr)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func tightenDBFilePerms(path string) error {
|
||||
if runtime.GOOS == "windows" {
|
||||
return nil
|
||||
}
|
||||
if err := os.Chmod(path, 0o600); err != nil {
|
||||
return fmt.Errorf("chmod db file: %w", err)
|
||||
}
|
||||
return nil
|
||||
return store, nil
|
||||
}
|
||||
|
||||
func (s *Store) Close() error {
|
||||
|
||||
@ -149,8 +149,9 @@ func TestStoreMaintenanceHelpers(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = s.Close() }()
|
||||
|
||||
require.NoError(t, ensureDBFile(dbPath))
|
||||
require.NoError(t, tightenDBFilePerms(dbPath))
|
||||
info, err := os.Stat(dbPath)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, os.FileMode(0o600), info.Mode().Perm())
|
||||
require.NoError(t, s.RebuildSearchIndexes(ctx))
|
||||
version, err := s.schemaVersion(ctx)
|
||||
require.NoError(t, err)
|
||||
@ -1519,6 +1520,7 @@ func TestListMessagesFiltersAndLimit(t *testing.T) {
|
||||
|
||||
require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "maintainers", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c2", GuildID: "g1", Kind: "text", Name: "random", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertGuild(ctx, GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertMember(ctx, MemberRecord{
|
||||
GuildID: "g1",
|
||||
UserID: "u1",
|
||||
@ -1625,6 +1627,7 @@ func TestListMessagesFiltersAndLimit(t *testing.T) {
|
||||
require.Len(t, rows, 1)
|
||||
require.Equal(t, "m4", rows[0].MessageID)
|
||||
require.Equal(t, "fallback-user", rows[0].AuthorName)
|
||||
require.Equal(t, "Guild", rows[0].GuildName)
|
||||
require.True(t, rows[0].Pinned)
|
||||
require.True(t, rows[0].HasAttachments)
|
||||
|
||||
@ -1666,6 +1669,49 @@ func TestListMessagesFiltersAndLimit(t *testing.T) {
|
||||
require.Equal(t, "m4", rows[1].MessageID)
|
||||
}
|
||||
|
||||
func TestListMessagesWithThreadContextHydratesReplyRoot(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := context.Background()
|
||||
s, err := Open(ctx, filepath.Join(t.TempDir(), "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = s.Close() }()
|
||||
|
||||
require.NoError(t, s.UpsertGuild(ctx, GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertMessage(ctx, MessageRecord{
|
||||
ID: "root",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "general",
|
||||
AuthorID: "u1",
|
||||
MessageType: 0,
|
||||
CreatedAt: "2026-03-01T10:00:00Z",
|
||||
Content: "root message",
|
||||
NormalizedContent: "root message",
|
||||
RawJSON: `{}`,
|
||||
}))
|
||||
require.NoError(t, s.UpsertMessage(ctx, MessageRecord{
|
||||
ID: "reply",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "general",
|
||||
AuthorID: "u2",
|
||||
MessageType: 0,
|
||||
CreatedAt: "2026-03-02T10:00:00Z",
|
||||
Content: "reply message",
|
||||
NormalizedContent: "reply message",
|
||||
ReplyToMessageID: "root",
|
||||
RawJSON: `{}`,
|
||||
}))
|
||||
|
||||
rows, err := s.ListMessagesWithThreadContext(ctx, MessageListOptions{Last: 1})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, rows, 2)
|
||||
require.Equal(t, "reply", rows[0].MessageID)
|
||||
require.Equal(t, "root", rows[1].MessageID)
|
||||
}
|
||||
|
||||
func TestNormalizeFTSQueryEdgeCases(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
@ -7,6 +7,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/bwmarrin/discordgo"
|
||||
"github.com/vincentkoc/crawlkit/progress"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
@ -663,6 +665,8 @@ func (p *messageSyncProgress) complete(channel *discordgo.Channel, count int, ou
|
||||
totalChannels := p.totalChannels
|
||||
messages := p.messages
|
||||
elapsed := now.Sub(p.startedAt).Round(time.Second).String()
|
||||
percent := progress.Percent(int64(processed), int64(totalChannels))
|
||||
completion := progress.Completion(int64(processed), int64(totalChannels))
|
||||
p.mu.Unlock()
|
||||
p.syncer.logger.Info(
|
||||
"message sync progress",
|
||||
@ -670,6 +674,8 @@ func (p *messageSyncProgress) complete(channel *discordgo.Channel, count int, ou
|
||||
"processed_channels", processed,
|
||||
"total_channels", totalChannels,
|
||||
"remaining_channels", totalChannels-processed,
|
||||
"percent", percent,
|
||||
"completion", completion,
|
||||
"active_channels", activeChannels,
|
||||
"messages_written", messages,
|
||||
"deferred_channels", deferred,
|
||||
@ -698,6 +704,8 @@ func (p *messageSyncProgress) finish(err error) {
|
||||
totalChannels := p.totalChannels
|
||||
messages := p.messages
|
||||
elapsed := now.Sub(p.startedAt).Round(time.Second).String()
|
||||
percent := progress.Percent(int64(processed), int64(totalChannels))
|
||||
completion := progress.Completion(int64(processed), int64(totalChannels))
|
||||
oldestID, oldestName, oldestElapsed, oldestIdle, oldestPages, oldestPageMessages := oldestInflightDetails(p.inflight, now)
|
||||
p.mu.Unlock()
|
||||
attrs := []any{
|
||||
@ -705,6 +713,8 @@ func (p *messageSyncProgress) finish(err error) {
|
||||
"processed_channels", processed,
|
||||
"total_channels", totalChannels,
|
||||
"remaining_channels", totalChannels - processed,
|
||||
"percent", percent,
|
||||
"completion", completion,
|
||||
"active_channels", activeChannels,
|
||||
"messages_written", messages,
|
||||
"deferred_channels", deferred,
|
||||
@ -766,6 +776,8 @@ func (p *messageSyncProgress) logWaitHeartbeat() {
|
||||
messages := p.messages
|
||||
idleFor := now.Sub(p.lastProgressAt).Round(time.Second).String()
|
||||
elapsed := now.Sub(p.startedAt).Round(time.Second).String()
|
||||
percent := progress.Percent(int64(processed), int64(totalChannels))
|
||||
completion := progress.Completion(int64(processed), int64(totalChannels))
|
||||
oldestID, oldestName, oldestElapsed, oldestIdle, oldestPages, oldestPageMessages := oldestInflightDetails(p.inflight, now)
|
||||
p.mu.Unlock()
|
||||
p.syncer.logger.Info(
|
||||
@ -774,6 +786,8 @@ func (p *messageSyncProgress) logWaitHeartbeat() {
|
||||
"processed_channels", processed,
|
||||
"total_channels", totalChannels,
|
||||
"remaining_channels", totalChannels-processed,
|
||||
"percent", percent,
|
||||
"completion", completion,
|
||||
"active_channels", activeChannels,
|
||||
"messages_written", messages,
|
||||
"deferred_channels", deferred,
|
||||
|
||||
@ -73,6 +73,8 @@ func TestMessageSyncProgressFinishReportsSummaryCounts(t *testing.T) {
|
||||
logs := out.String()
|
||||
require.Contains(t, logs, `msg="message sync finished"`)
|
||||
require.Contains(t, logs, `processed_channels=3`)
|
||||
require.Contains(t, logs, `percent=100.0`)
|
||||
require.Contains(t, logs, `completion=100.0%`)
|
||||
require.Contains(t, logs, `messages_written=42`)
|
||||
require.Contains(t, logs, `skipped_missing_access_channels=1`)
|
||||
require.Contains(t, logs, `skipped_unknown_channel_channels=1`)
|
||||
@ -105,4 +107,6 @@ func TestMessageSyncProgressReportsWaitingHeartbeat(t *testing.T) {
|
||||
require.Contains(t, logs, `oldest_active_channel_id=c1`)
|
||||
require.Contains(t, logs, `oldest_active_channel_name=slowpoke`)
|
||||
require.Contains(t, logs, `active_channels=1`)
|
||||
require.Contains(t, logs, `percent=0.0`)
|
||||
require.Contains(t, logs, `completion=0.0%`)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user