diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 57a67d0..df9585a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -91,7 +91,19 @@ jobs: }' - name: Build - run: go build ./cmd/discrawl + run: go build -o bin/discrawl ./cmd/discrawl + + - name: Smoke test CLI control surface + run: | + set -euo pipefail + output="$(./bin/discrawl help)" + printf '%s\n' "$output" + printf '%s' "$output" | grep -q "metadata" + printf '%s' "$output" | grep -q "tui" + test -n "$(./bin/discrawl --version)" + ./bin/discrawl metadata --json | grep -q '"schema_version"' + ./bin/discrawl status --json | grep -q '"databases"' + ./bin/discrawl tui --json | grep -q '^\[' deps: runs-on: ubuntu-latest diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 9929d7b..5da4635 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -12,7 +12,7 @@ builds: env: - CGO_ENABLED=0 ldflags: - - -s -w -X github.com/steipete/discrawl/internal/cli.version={{ .Version }} + - -s -w -X github.com/openclaw/discrawl/internal/cli.version={{ .Version }} targets: - darwin_amd64 - darwin_arm64 diff --git a/CHANGELOG.md b/CHANGELOG.md index ced5c0d..9340a48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,10 +26,24 @@ - Refreshed dependency and CI tooling pins, including GoReleaser, `go-toml`, golangci-lint, and gosec. - Tightened CI compatibility with the latest linters and made signal-cancellation and sync fixture tests deterministic under the race detector. +### Fixes + +- Label direct-message TUI panes as direct messages instead of raw `@me` guild rows, keeping DM channel/person context readable. +- Inherit shared crawlkit TUI improvements for newest-first startup, count-header sorting, selected-message-first chat detail panes, and gitcrawl-style metadata labels. +- Surface Discord attachment filenames and extracted text in TUI detail panes instead of only showing `attachments=true`. + ## 0.6.3 - 2026-05-01 +### Changes + +- Add crawlkit control metadata/status surfaces with `metadata --json`, `status --json`, and `doctor --json`. +- Add `tap` and `cache-import` as public desktop-cache import names while keeping `wiretap` as a documented legacy alias. +- Add `discrawl tui`, a terminal archive browser for stored guild messages and local `@me` wiretap DMs using the shared `crawlkit/tui` package. +- Render TUI rows with compact panes and expose pinned, attachment, reply, channel, and author metadata in the detail pane. + ### Fixes +- Keep status and TUI reads safe for fresh or missing local databases without triggering git-share auto-update. - Added OS keyring fallback for Discord bot-token resolution, keeping env as the first source and documenting the default keyring item. (#17) - Clarified and locked down FTS query normalization so operator-like search terms such as `AND`, `OR`, `NOT`, `NEAR`, and `*` stay parameterized and quoted before SQLite `MATCH`. Thanks @mvanhorn. diff --git a/README.md b/README.md index 2738808..85fae20 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Wiretap DMs stay local and are never exported to the Git-backed snapshot mirror. - tails Gateway events for live updates, with periodic repair syncs - imports classifiable Discord Desktop cache messages with `wiretap`, including proven DMs under `@me` - publishes and imports private Git-backed archive snapshots for org-wide read access +- browses stored messages and local DMs in a terminal archive UI - supports Git-only read mode with no Discord credentials on reader machines - generates backup README activity reports, with optional AI-written field notes - exposes read-only SQL for ad hoc analysis @@ -159,6 +160,17 @@ discrawl messages --channel general --hours 24 ## Commands +### `tui` + +Opens the local terminal archive browser for stored messages. + +```bash +discrawl tui +discrawl tui --guild 123456789012345678 --channel general +discrawl tui --dm +discrawl --json tui --limit 50 +``` + ### `init` Creates the local config and discovers accessible guilds. @@ -683,6 +695,7 @@ go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.1 run go test ./... -coverprofile=/tmp/discrawl.cover go tool cover -func=/tmp/discrawl.cover | tail -n 1 go build ./cmd/discrawl +go run ./cmd/discrawl help | grep tui ``` Target coverage is `>= 85%`. diff --git a/go.mod b/go.mod index fdebb09..0c7355e 100644 --- a/go.mod +++ b/go.mod @@ -5,26 +5,50 @@ go 1.26.2 require ( github.com/bwmarrin/discordgo v0.29.0 github.com/gorilla/websocket v1.5.3 - github.com/pelletier/go-toml/v2 v2.3.1 github.com/stretchr/testify v1.11.1 github.com/zalando/go-keyring v0.2.8 golang.org/x/sys v0.43.0 golang.org/x/text v0.36.0 - modernc.org/sqlite v1.50.0 ) require ( + github.com/charmbracelet/bubbles v1.0.0 // indirect + github.com/clipperhouse/displaywidth v0.9.0 // indirect + github.com/clipperhouse/stringish v0.1.1 // indirect + github.com/clipperhouse/uax29/v2 v2.5.0 // indirect + github.com/pelletier/go-toml/v2 v2.3.1 // indirect + modernc.org/sqlite v1.50.0 // indirect +) + +require ( + github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect + github.com/charmbracelet/bubbletea v1.3.10 // indirect + github.com/charmbracelet/colorprofile v0.4.1 // indirect + github.com/charmbracelet/lipgloss v1.1.0 // indirect + github.com/charmbracelet/x/ansi v0.11.6 // indirect + github.com/charmbracelet/x/cellbuf v0.0.15 // indirect + github.com/charmbracelet/x/term v0.2.2 // indirect github.com/danieljoos/wincred v1.2.3 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect + github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect github.com/godbus/dbus/v5 v5.2.2 // indirect github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect github.com/google/uuid v1.6.0 // indirect github.com/kr/pretty v0.3.1 // indirect + github.com/lucasb-eyer/go-colorful v1.3.0 // indirect github.com/mattn/go-isatty v0.0.22 // indirect + github.com/mattn/go-localereader v0.0.1 // indirect + github.com/mattn/go-runewidth v0.0.19 // indirect + github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect + github.com/muesli/cancelreader v0.2.2 // indirect + github.com/muesli/termenv v0.16.0 // indirect github.com/ncruces/go-strftime v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect + github.com/rivo/uniseg v0.4.7 // indirect + github.com/vincentkoc/crawlkit v0.4.0 + github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect golang.org/x/crypto v0.50.0 // indirect golang.org/x/tools v0.44.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect diff --git a/go.sum b/go.sum index bceab2a..b98a01f 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,27 @@ +github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= +github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/bwmarrin/discordgo v0.29.0 h1:FmWeXFaKUwrcL3Cx65c20bTRW+vOb6k8AnaP+EgjDno= github.com/bwmarrin/discordgo v0.29.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY= +github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc= +github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E= +github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw= +github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4= +github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk= +github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk= +github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY= +github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30= +github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8= +github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ= +github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI= +github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q= +github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk= +github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI= +github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA= +github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA= +github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs= +github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA= +github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U= +github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/danieljoos/wincred v1.2.3 h1:v7dZC2x32Ut3nEfRH+vhoZGvN72+dQ/snVXo/vMFLdQ= github.com/danieljoos/wincred v1.2.3/go.mod h1:6qqX0WNrS4RzPZ1tnroDzq9kY3fu1KwE7MRLQK4X0bs= @@ -7,6 +29,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= +github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ= github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c= github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 h1:EwtI+Al+DeppwYX2oXJCETMO23COyaKGP6fHVpkpWpg= @@ -25,8 +49,20 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag= +github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4= github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4= +github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= +github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= +github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= +github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= +github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= +github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= +github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= +github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= +github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= +github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc= @@ -36,23 +72,32 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/vincentkoc/crawlkit v0.4.0 h1:1jQZAYbBivy6d7ewNdMZ8THgmJVwb+pQT0kH5Z9COHI= +github.com/vincentkoc/crawlkit v0.4.0/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs= github.com/zalando/go-keyring v0.2.8/go.mod h1:tsMo+VpRq5NGyKfxoBVjCuMrG47yj8cmakZDO5QGii0= golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM= golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= diff --git a/internal/cli/admin_commands.go b/internal/cli/admin_commands.go index 5ccbe9b..e6b0871 100644 --- a/internal/cli/admin_commands.go +++ b/internal/cli/admin_commands.go @@ -17,6 +17,7 @@ import ( "github.com/openclaw/discrawl/internal/discord" "github.com/openclaw/discrawl/internal/discorddesktop" "github.com/openclaw/discrawl/internal/embed" + "github.com/openclaw/discrawl/internal/share" "github.com/openclaw/discrawl/internal/store" "github.com/openclaw/discrawl/internal/syncer" ) @@ -314,16 +315,37 @@ func (r *runtime) runWiretap(args []string) error { } func (r *runtime) runStatus(args []string) error { - if len(args) != 0 { + fs := flag.NewFlagSet("status", flag.ContinueOnError) + fs.SetOutput(io.Discard) + jsonOut := fs.Bool("json", false, "") + if err := fs.Parse(args); err != nil { + return usageErr(err) + } + if fs.NArg() != 0 { return usageErr(errors.New("status takes no arguments")) } + if *jsonOut { + r.json = true + } dbPath, err := config.ExpandPath(r.cfg.DBPath) if err != nil { return configErr(err) } - status, err := r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID()) - if err != nil { - return err + status := store.Status{DBPath: dbPath, DefaultGuildID: r.cfg.EffectiveDefaultGuildID()} + if r.store != nil { + status, err = r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID()) + if err != nil { + return err + } + } + if r.json { + needsUpdate := false + if r.store != nil && r.cfg.ShareEnabled() { + if staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter); err == nil { + needsUpdate = share.NeedsImport(r.ctx, r.store, staleAfter) + } + } + return r.print(controlStatus(r.configPath, r.cfg, status, needsUpdate)) } return r.print(status) } @@ -384,9 +406,18 @@ func (r *runtime) runEmbed(args []string) error { } func (r *runtime) runDoctor(args []string) error { - if len(args) != 0 { + fs := flag.NewFlagSet("doctor", flag.ContinueOnError) + fs.SetOutput(io.Discard) + jsonOut := fs.Bool("json", false, "") + if err := fs.Parse(args); err != nil { + return usageErr(err) + } + if fs.NArg() != 0 { return usageErr(errors.New("doctor takes no arguments")) } + if *jsonOut { + r.json = true + } report := map[string]any{ "config_path": r.configPath, } diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 0556ce6..76f4614 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -47,6 +47,10 @@ func ExitCode(err error) int { } func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error { + if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" { + printUsage(stdout) + return nil + } global := flag.NewFlagSet("discrawl", flag.ContinueOnError) global.SetOutput(io.Discard) configPath := global.String("config", "", "") @@ -66,10 +70,14 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error { return nil } rest := global.Args() - if len(rest) == 0 || rest[0] == "help" { + if len(rest) == 0 || rest[0] == "help" || rest[0] == "--help" || rest[0] == "-h" { printUsage(stdout) return nil } + if rest[0] == "version" { + _, _ = io.WriteString(stdout, version+"\n") + return nil + } level := slog.LevelInfo if *quiet { level = slog.LevelError @@ -129,6 +137,8 @@ type attachmentTextConfigurer interface { func (r *runtime) dispatch(rest []string) error { switch rest[0] { + case "metadata": + return r.runMetadata(rest[1:]) case "init": return r.runInit(rest[1:]) case "sync": @@ -141,9 +151,16 @@ func (r *runtime) dispatch(rest []string) error { return r.withServicesLocked(true, func() error { return r.runTail(rest[1:]) }) case "wiretap": return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) }) + case "tap", "cache-import": + return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) }) case "search": autoShareUpdate := !hasBoolFlag(rest[1:], "--dm") return r.withLocalStoreDefaultLocked(autoShareUpdate, autoShareUpdate, func() error { return r.runSearch(rest[1:]) }) + case "tui": + if hasHelpArg(rest[1:]) { + return r.runTUI(rest[1:]) + } + return r.withLocalStoreReadOnly(func() error { return r.runTUI(rest[1:]) }) case "messages": if hasBoolFlag(rest[1:], "--sync") && !hasBoolFlag(rest[1:], "--dm") { return r.withServicesAutoLocked(true, true, true, func() error { return r.runMessages(rest[1:]) }) @@ -167,7 +184,7 @@ func (r *runtime) dispatch(rest []string) error { case "channels": return r.withLocalStoreLocked(true, func() error { return r.runChannels(rest[1:]) }) case "status": - return r.withLocalStoreLocked(true, func() error { return r.runStatus(rest[1:]) }) + return r.withLocalStoreReadOnly(func() error { return r.runStatus(rest[1:]) }) case "report": return r.withLocalStoreLocked(true, func() error { return r.runReport(rest[1:]) }) case "publish": @@ -249,6 +266,35 @@ func (r *runtime) openLocalStore(dbPath string, updateMode shareUpdateMode, fn f return fn() } +func (r *runtime) withLocalStoreReadOnly(fn func() error) error { + cfg, err := config.Load(r.configPath) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + return configErr(err) + } + cfg = config.Default() + if err := cfg.Normalize(); err != nil { + return configErr(err) + } + } + dbPath, err := config.ExpandPath(cfg.DBPath) + if err != nil { + return configErr(err) + } + r.cfg = cfg + var openErr error + r.store, openErr = store.OpenReadOnly(r.ctx, dbPath) + if openErr != nil { + if errors.Is(openErr, os.ErrNotExist) { + r.store = nil + return fn() + } + return dbErr(openErr) + } + defer func() { _ = r.store.Close() }() + return fn() +} + func (r *runtime) withServicesAuto(withDiscord, autoShareUpdate bool, fn func() error) error { return r.withServicesAutoLocked(withDiscord, autoShareUpdate, false, fn) } diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index d7e4863..76b2a9f 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -76,6 +76,21 @@ func TestStatusSearchSQLAndListings(t *testing.T) { NormalizedContent: "panic locked database", RawJSON: `{}`, })) + require.NoError(t, s.UpsertGuild(ctx, store.GuildRecord{ID: "g2", Name: "Other Guild", RawJSON: `{}`})) + require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g2", Kind: "text", Name: "random", RawJSON: `{}`})) + require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{ + ID: "m-other", + GuildID: "g2", + ChannelID: "c2", + ChannelName: "random", + AuthorID: "u2", + AuthorName: "Outside", + MessageType: 0, + CreatedAt: time.Now().UTC().Add(-time.Hour).Format(time.RFC3339Nano), + Content: "outside default guild", + NormalizedContent: "outside default guild", + RawJSON: `{}`, + })) require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{ ID: "m2", GuildID: "g1", @@ -137,6 +152,35 @@ func TestStatusSearchSQLAndListings(t *testing.T) { require.NoError(t, Run(ctx, args, &out, &bytes.Buffer{})) require.NotEmpty(t, out.String()) } + + before, err := os.ReadFile(dbPath) + require.NoError(t, err) + var out bytes.Buffer + require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "tui", "--limit", "5"}, &out, &bytes.Buffer{})) + var rows []map[string]any + require.NoError(t, json.Unmarshal(out.Bytes(), &rows)) + require.NotEmpty(t, rows) + require.Equal(t, "panic locked database", rows[0]["title"]) + require.Equal(t, "discord", rows[0]["source"]) + require.Equal(t, "message", rows[0]["kind"]) + require.Equal(t, "Guild", rows[0]["scope"]) + require.Equal(t, "general", rows[0]["container"]) + require.Equal(t, "https://discord.com/channels/g1/c1/m1", rows[0]["url"]) + after, err := os.ReadFile(dbPath) + require.NoError(t, err) + require.Equal(t, before, after, "tui --json should not mutate the database") +} + +func TestTUIHelpReturnsUsage(t *testing.T) { + var stdout bytes.Buffer + var stderr bytes.Buffer + + require.NoError(t, Run(context.Background(), []string{"tui", "--help"}, &stdout, &stderr)) + require.Contains(t, stdout.String(), "Usage of tui:") + require.Contains(t, stdout.String(), "-limit") + require.Contains(t, stdout.String(), "right-click") + require.Contains(t, stdout.String(), "# jump") + require.Empty(t, stderr.String()) } func TestWiretapImportsDesktopDirectMessages(t *testing.T) { @@ -183,6 +227,53 @@ func TestWiretapImportsDesktopDirectMessages(t *testing.T) { require.Contains(t, out.String(), "secret DM launch plan") } +func TestDiscordTUIRowsIncludePaneMetadata(t *testing.T) { + rows := discordTUIRows([]store.MessageRow{{ + MessageID: "m1", + GuildID: "@me", + GuildName: "Discord Direct Messages", + ChannelID: "c1", + ChannelName: "Vincent K", + AuthorID: "u1", + AuthorName: "Peter", + Content: "hello from desktop", + DisplayContent: "hello from Vincent", + CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC), + ReplyToMessage: "m0", + HasAttachments: true, + AttachmentNames: "trace.txt", + AttachmentText: "stack trace line one", + Pinned: true, + }}) + require.Len(t, rows, 1) + require.Equal(t, "hello from Vincent", rows[0].Title) + require.Contains(t, rows[0].Detail, "hello from Vincent") + require.Contains(t, rows[0].Detail, "Attachments") + require.Contains(t, rows[0].Detail, "stack trace line one") + require.Equal(t, "hello from Vincent", rows[0].Text) + require.Equal(t, "Direct messages", rows[0].Scope) + require.Equal(t, "Vincent K", rows[0].Container) + require.Contains(t, rows[0].Tags, "dm") + require.Equal(t, "true", rows[0].Fields["attachments"]) + require.Equal(t, "trace.txt", rows[0].Fields["attachment_names"]) + require.Equal(t, "true", rows[0].Fields["pinned"]) + require.Equal(t, "m0", rows[0].Fields["reply_to"]) + require.Equal(t, "@me", rows[0].Fields["guild_id"]) + + rows = discordTUIRows([]store.MessageRow{{ + MessageID: "m2", + GuildID: "g1", + ChannelID: "c2", + AuthorID: "439223656200273932", + Content: "desktop-only author", + CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC), + Source: "discord_desktop", + }}) + require.Equal(t, "user:439223...3932", rows[0].Author) + require.Equal(t, "DM c2", discordContainerLabel(store.MessageRow{GuildID: "@me", ChannelID: "c2"})) + require.Contains(t, rows[0].Tags, "discord_desktop") +} + func TestParseMessageWindow(t *testing.T) { rt := &runtime{now: func() time.Time { return time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC) diff --git a/internal/cli/control_commands.go b/internal/cli/control_commands.go new file mode 100644 index 0000000..681bd0f --- /dev/null +++ b/internal/cli/control_commands.go @@ -0,0 +1,96 @@ +package cli + +import ( + "errors" + "flag" + "fmt" + "io" + "os" + "time" + + "github.com/openclaw/discrawl/internal/config" + "github.com/openclaw/discrawl/internal/store" + "github.com/vincentkoc/crawlkit/control" +) + +func (r *runtime) runMetadata(args []string) error { + fs := flag.NewFlagSet("metadata", flag.ContinueOnError) + fs.SetOutput(io.Discard) + jsonOut := fs.Bool("json", false, "") + if err := fs.Parse(args); err != nil { + return usageErr(err) + } + if fs.NArg() != 0 { + return usageErr(errors.New("metadata takes flags only")) + } + if *jsonOut { + r.json = true + } + cfg := config.Default() + manifest := control.NewManifest("discrawl", "Discord Crawl", "discrawl") + manifest.Description = "Local-first Discord archive crawler." + manifest.Branding = control.Branding{SymbolName: "bubble.left.and.bubble.right.fill", AccentColor: "#5865f2", BundleIdentifier: "com.hnc.Discord"} + manifest.Paths = control.Paths{ + DefaultConfig: config.ResolvePath(""), + ConfigEnv: config.DefaultConfigEnv, + DefaultDatabase: cfg.DBPath, + DefaultCache: cfg.CacheDir, + DefaultLogs: cfg.LogDir, + DefaultShare: cfg.Share.RepoPath, + } + manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "tap", "tui", "git-share", "sql", "embeddings"} + manifest.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"discord", "desktop-cache", "sqlite", "git-share"}} + manifest.Commands = map[string]control.Command{ + "status": {Title: "Status", Argv: []string{"discrawl", "status", "--json"}, JSON: true}, + "doctor": {Title: "Doctor", Argv: []string{"discrawl", "doctor", "--json"}, JSON: true}, + "sync": {Title: "Sync", Argv: []string{"discrawl", "--json", "sync"}, JSON: true, Mutates: true}, + "tap": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "tap"}, JSON: true, Mutates: true}, + "cache-import": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "cache-import"}, JSON: true, Mutates: true}, + "wiretap": {Title: "Legacy desktop cache import", Argv: []string{"discrawl", "--json", "wiretap"}, JSON: true, Mutates: true, Legacy: true, Deprecated: true}, + "tui": {Title: "Terminal browser", Argv: []string{"discrawl", "tui"}}, + "tui-json": {Title: "Terminal browser rows", Argv: []string{"discrawl", "tui", "--json"}, JSON: true}, + "publish": {Title: "Publish share", Argv: []string{"discrawl", "--json", "publish"}, JSON: true, Mutates: true}, + "subscribe": {Title: "Subscribe share", Argv: []string{"discrawl", "--json", "subscribe"}, JSON: true, Mutates: true}, + "update": {Title: "Update share", Argv: []string{"discrawl", "--json", "update"}, JSON: true, Mutates: true}, + } + return r.print(manifest) +} + +func controlStatus(configPath string, cfg config.Config, status store.Status, shareNeedsUpdate bool) control.Status { + counts := []control.Count{ + control.NewCount("guilds", "Guilds", int64(status.GuildCount)), + control.NewCount("channels", "Channels", int64(status.ChannelCount)), + control.NewCount("threads", "Threads", int64(status.ThreadCount)), + control.NewCount("messages", "Messages", int64(status.MessageCount)), + control.NewCount("members", "Members", int64(status.MemberCount)), + control.NewCount("embedding_backlog", "Embedding backlog", int64(status.EmbeddingBacklog)), + } + out := control.NewStatus("discrawl", fmt.Sprintf("%d messages across %d channels", status.MessageCount, status.ChannelCount)) + out.State = "current" + out.ConfigPath = configPath + out.DatabasePath = status.DBPath + out.Counts = counts + if !status.LastSyncAt.IsZero() { + out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339) + } + db := control.SQLiteDatabase("primary", "Discord archive", "archive", status.DBPath, true, counts) + out.DatabaseBytes = db.Bytes + out.WALBytes = fileSize(status.DBPath + "-wal") + out.Databases = []control.Database{db} + out.Share = &control.Share{ + Enabled: cfg.ShareEnabled(), + RepoPath: cfg.Share.RepoPath, + Remote: cfg.Share.Remote, + Branch: cfg.Share.Branch, + NeedsUpdate: shareNeedsUpdate, + } + return out +} + +func fileSize(path string) int64 { + info, err := os.Stat(path) + if err != nil { + return 0 + } + return info.Size() +} diff --git a/internal/cli/output.go b/internal/cli/output.go index 46a3633..5aa3a99 100644 --- a/internal/cli/output.go +++ b/internal/cli/output.go @@ -100,11 +100,16 @@ Usage: discrawl [global flags] [args] Commands: + metadata + version init sync tail + tap + cache-import wiretap search + tui messages digest analytics diff --git a/internal/cli/query_sync.go b/internal/cli/query_sync.go index 8e01158..87f46ec 100644 --- a/internal/cli/query_sync.go +++ b/internal/cli/query_sync.go @@ -96,3 +96,12 @@ func hasBoolFlag(args []string, name string) bool { } return false } + +func hasHelpArg(args []string) bool { + for _, arg := range args { + if arg == "help" || arg == "--help" || arg == "-h" { + return true + } + } + return false +} diff --git a/internal/cli/tui_commands.go b/internal/cli/tui_commands.go new file mode 100644 index 0000000..4487661 --- /dev/null +++ b/internal/cli/tui_commands.go @@ -0,0 +1,239 @@ +package cli + +import ( + "context" + "errors" + "flag" + "fmt" + "strings" + + "github.com/vincentkoc/crawlkit/tui" + + "github.com/openclaw/discrawl/internal/store" +) + +func (r *runtime) runTUI(args []string) error { + fs := flag.NewFlagSet("tui", flag.ContinueOnError) + fs.SetOutput(r.stderr) + fs.Usage = func() { + _, _ = fmt.Fprintln(fs.Output(), "Usage of tui:") + fs.PrintDefaults() + _, _ = fmt.Fprintln(fs.Output()) + _, _ = fmt.Fprintln(fs.Output(), tui.ControlsHelp()) + } + if hasHelpArg(args) { + fs.SetOutput(r.stdout) + } + channel := fs.String("channel", "", "channel id") + author := fs.String("author", "", "author/user id") + limit := fs.Int("limit", 200, "row limit") + includeEmpty := fs.Bool("include-empty", false, "include empty messages") + dm := fs.Bool("dm", false, "browse direct messages") + guildsFlag := fs.String("guilds", "", "comma-separated guild ids") + guildFlag := fs.String("guild", "", "guild id") + jsonOut := fs.Bool("json", false, "write browser rows as JSON") + if len(args) == 1 && args[0] == "help" { + fs.Usage() + return nil + } + if err := fs.Parse(args); err != nil { + if errors.Is(err, flag.ErrHelp) { + return nil + } + return usageErr(err) + } + if *jsonOut { + r.json = true + } + if fs.NArg() != 0 { + return usageErr(errors.New("tui takes flags only")) + } + if *limit <= 0 { + return usageErr(errors.New("tui --limit must be positive")) + } + guildIDs, err := r.resolveTUIGuilds(*dm, *guildFlag, *guildsFlag) + if err != nil { + return usageErr(err) + } + if r.store == nil { + return tui.Browse(r.ctx, tui.BrowseOptions{ + AppName: "discrawl", + Title: "discrawl archive", + EmptyMessage: "discrawl has no local messages yet", + JSON: r.json, + Layout: tui.LayoutChat, + SourceKind: r.archiveSourceKind(), + SourceLocation: r.archiveSourceLocation(), + Stdout: r.stdout, + }) + } + loadRows := func() ([]tui.Row, error) { + rows, err := r.store.ListMessagesWithThreadContext(r.ctx, store.MessageListOptions{ + GuildIDs: guildIDs, + Channel: *channel, + Author: *author, + Last: *limit, + IncludeEmpty: *includeEmpty, + }) + if err != nil { + return nil, err + } + return discordTUIRows(rows), nil + } + archiveRows, err := loadRows() + if err != nil { + return err + } + return tui.Browse(r.ctx, tui.BrowseOptions{ + AppName: "discrawl", + Title: "discrawl archive", + EmptyMessage: "discrawl has no local messages yet", + Rows: archiveRows, + Refresh: func(context.Context) ([]tui.Row, error) { return loadRows() }, + JSON: r.json, + Layout: tui.LayoutChat, + SourceKind: r.archiveSourceKind(), + SourceLocation: r.archiveSourceLocation(), + Stdout: r.stdout, + }) +} + +func (r *runtime) resolveTUIGuilds(dm bool, guild, guilds string) ([]string, error) { + guildIDs, err := directMessageGuildScope(dm, guild, guilds) + if err != nil || dm || len(guildIDs) > 0 { + return guildIDs, err + } + if defaultGuild := r.cfg.EffectiveDefaultGuildID(); defaultGuild != "" { + return []string{defaultGuild}, nil + } + return nil, nil +} + +func (r *runtime) archiveSourceKind() string { + if strings.TrimSpace(r.cfg.Share.Remote) != "" { + return tui.SourceRemote + } + return tui.SourceLocal +} + +func (r *runtime) archiveSourceLocation() string { + if strings.TrimSpace(r.cfg.Share.Remote) != "" { + return r.cfg.Share.Remote + } + return r.cfg.DBPath +} + +func discordTUIRows(rows []store.MessageRow) []tui.Row { + items := make([]tui.Row, 0, len(rows)) + for _, row := range rows { + content := discordDisplayContent(row) + title := strings.TrimSpace(content) + detail := discordDetailContent(row, content) + if title == "" { + title = firstNonEmpty(strings.TrimSpace(row.AttachmentText), row.MessageID) + } + tags := []string{row.GuildID, row.ChannelID} + if row.GuildID == "@me" { + tags = append(tags, "dm") + } + if row.Source != "" { + tags = append(tags, row.Source) + } + items = append(items, tui.Row{ + Source: "discord", + Kind: "message", + ID: row.MessageID, + ParentID: row.ReplyToMessage, + Scope: discordScopeLabel(row), + Container: discordContainerLabel(row), + Author: discordAuthorLabel(row), + Title: title, + Text: content, + Detail: detail, + URL: discordMessageURL(row), + CreatedAt: formatTime(row.CreatedAt), + Tags: tags, + Fields: map[string]string{ + "attachment_names": row.AttachmentNames, + "attachments": boolString(row.HasAttachments), + "author_id": row.AuthorID, + "channel_id": row.ChannelID, + "guild_id": row.GuildID, + "pinned": boolString(row.Pinned), + "reply_to": row.ReplyToMessage, + "source": row.Source, + }, + }) + } + return items +} + +func discordDetailContent(row store.MessageRow, content string) string { + var parts []string + if strings.TrimSpace(content) != "" { + parts = append(parts, strings.TrimSpace(content)) + } + if strings.TrimSpace(row.AttachmentText) != "" { + parts = append(parts, "Attachments\n"+strings.TrimSpace(row.AttachmentText)) + } + if len(parts) == 0 { + return "" + } + return strings.Join(parts, "\n\n") +} + +func discordDisplayContent(row store.MessageRow) string { + if content := strings.TrimSpace(row.DisplayContent); content != "" { + return content + } + return row.Content +} + +func discordMessageURL(row store.MessageRow) string { + guildID := strings.TrimSpace(row.GuildID) + channelID := strings.TrimSpace(row.ChannelID) + messageID := strings.TrimSpace(row.MessageID) + if guildID == "" || channelID == "" || messageID == "" { + return "" + } + return "https://discord.com/channels/" + guildID + "/" + channelID + "/" + messageID +} + +func discordScopeLabel(row store.MessageRow) string { + if row.GuildID == "@me" { + return "Direct messages" + } + return firstNonEmpty(row.GuildName, row.GuildID) +} + +func discordContainerLabel(row store.MessageRow) string { + if row.GuildID == "@me" { + return firstNonEmpty(row.ChannelName, "DM "+compactDiscordID(row.ChannelID)) + } + return firstNonEmpty(row.ChannelName, row.ChannelID) +} + +func discordAuthorLabel(row store.MessageRow) string { + if name := strings.TrimSpace(row.AuthorName); name != "" { + return name + } + if id := strings.TrimSpace(row.AuthorID); id != "" { + return "user:" + compactDiscordID(id) + } + return "" +} + +func compactDiscordID(id string) string { + id = strings.TrimSpace(id) + if len(id) <= 10 { + return id + } + return id[:6] + "..." + id[len(id)-4:] +} + +func boolString(value bool) string { + if value { + return "true" + } + return "" +} diff --git a/internal/config/config.go b/internal/config/config.go index 8d767f1..0eb33d7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -9,7 +9,7 @@ import ( "strings" "time" - "github.com/pelletier/go-toml/v2" + crawlconfig "github.com/vincentkoc/crawlkit/config" ) const ( @@ -85,14 +85,25 @@ type TokenResolution struct { Path string } +var appConfig = crawlconfig.App{Name: "discrawl", ConfigEnv: DefaultConfigEnv, BaseDir: "~/.discrawl", LegacyBaseDir: "~/.discrawl"} + func Default() Config { home, _ := os.UserHomeDir() - base := filepath.Join(home, ".discrawl") + paths, err := appConfig.DefaultPaths() + if err != nil { + base := filepath.Join(home, ".discrawl") + paths = crawlconfig.Paths{ + DBPath: filepath.Join(base, "discrawl.db"), + CacheDir: filepath.Join(base, "cache"), + LogDir: filepath.Join(base, "logs"), + ShareDir: filepath.Join(base, "share"), + } + } return Config{ Version: 1, - DBPath: filepath.Join(base, "discrawl.db"), - CacheDir: filepath.Join(base, "cache"), - LogDir: filepath.Join(base, "logs"), + DBPath: paths.DBPath, + CacheDir: paths.CacheDir, + LogDir: paths.LogDir, DefaultGuildID: "", Discord: DiscordConfig{ TokenSource: "env", @@ -124,7 +135,7 @@ func Default() Config { }, }, Share: ShareConfig{ - RepoPath: filepath.Join(base, "share"), + RepoPath: paths.ShareDir, Branch: "main", AutoUpdate: true, StaleAfter: "15m", @@ -145,14 +156,12 @@ func defaultSyncConcurrency() int { } func ResolvePath(flagPath string) string { - if strings.TrimSpace(flagPath) != "" { - return flagPath + path, err := appConfig.ResolveConfigPath(flagPath) + if err != nil { + home, _ := os.UserHomeDir() + return filepath.Join(home, ".discrawl", "config.toml") } - if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" { - return envPath - } - home, _ := os.UserHomeDir() - return filepath.Join(home, ".discrawl", "config.toml") + return path } func Load(path string) (Config, error) { @@ -161,13 +170,9 @@ func Load(path string) (Config, error) { if err != nil { return Config{}, err } - data, err := os.ReadFile(expanded) - if err != nil { + if err := crawlconfig.LoadTOML(expanded, &cfg); err != nil { return Config{}, err } - if err := toml.Unmarshal(data, &cfg); err != nil { - return Config{}, fmt.Errorf("parse config: %w", err) - } if err := cfg.Normalize(); err != nil { return Config{}, err } @@ -182,14 +187,7 @@ func Write(path string, cfg Config) error { if err != nil { return err } - if err := os.MkdirAll(filepath.Dir(expanded), 0o755); err != nil { - return fmt.Errorf("mkdir config dir: %w", err) - } - data, err := toml.Marshal(cfg) - if err != nil { - return fmt.Errorf("marshal config: %w", err) - } - return os.WriteFile(expanded, data, 0o600) + return crawlconfig.WriteTOML(expanded, cfg, 0o600) } func (c *Config) Normalize() error { @@ -343,35 +341,18 @@ func (c Config) ShareEnabled() bool { } func EnsureRuntimeDirs(cfg Config) error { - paths := []string{cfg.CacheDir, cfg.LogDir, filepath.Dir(cfg.DBPath)} - for _, path := range paths { - expanded, err := ExpandPath(path) - if err != nil { - return err - } - if err := os.MkdirAll(expanded, 0o755); err != nil { - return fmt.Errorf("mkdir %s: %w", expanded, err) - } - } - return nil + return crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{ + DBPath: cfg.DBPath, + CacheDir: cfg.CacheDir, + LogDir: cfg.LogDir, + }) } func ExpandPath(path string) (string, error) { if strings.TrimSpace(path) == "" { return "", errors.New("empty path") } - if strings.HasPrefix(path, "~/") || path == "~" { - home, err := os.UserHomeDir() - if err != nil { - return "", fmt.Errorf("home dir: %w", err) - } - if path == "~" { - path = home - } else { - path = filepath.Join(home, strings.TrimPrefix(path, "~/")) - } - } - return filepath.Clean(os.ExpandEnv(path)), nil + return filepath.Clean(os.ExpandEnv(crawlconfig.ExpandHome(path))), nil } func uniqueStrings(in []string) []string { diff --git a/internal/share/share.go b/internal/share/share.go index dd2bccc..4466e9c 100644 --- a/internal/share/share.go +++ b/internal/share/share.go @@ -12,12 +12,13 @@ import ( "os" "os/exec" "path/filepath" - "slices" "strconv" "strings" "time" "github.com/openclaw/discrawl/internal/store" + "github.com/vincentkoc/crawlkit/mirror" + "github.com/vincentkoc/crawlkit/snapshot" ) const ( @@ -27,7 +28,7 @@ const ( directMessageGuildID = "@me" ) -var ErrNoManifest = errors.New("share manifest not found") +var ErrNoManifest = snapshot.ErrNoManifest const shardFlushRows = 1024 @@ -73,13 +74,7 @@ type Manifest struct { Files map[string]string `json:"files,omitempty"` } -type TableManifest struct { - Name string `json:"name"` - File string `json:"file,omitempty"` - Files []string `json:"files,omitempty"` - Columns []string `json:"columns"` - Rows int `json:"rows"` -} +type TableManifest = snapshot.TableManifest type EmbeddingManifest struct { Provider string `json:"provider"` @@ -94,120 +89,52 @@ func EnsureRepo(ctx context.Context, opts Options) error { if strings.TrimSpace(opts.RepoPath) == "" { return errors.New("share repo path is empty") } - if _, err := os.Stat(filepath.Join(opts.RepoPath, ".git")); err == nil { - return nil - } - if strings.TrimSpace(opts.Remote) != "" { - if err := os.MkdirAll(filepath.Dir(opts.RepoPath), 0o755); err != nil { - return fmt.Errorf("mkdir share parent: %w", err) - } - if err := run(ctx, "", "git", "clone", opts.Remote, opts.RepoPath); err != nil { - return err - } - if strings.TrimSpace(opts.Branch) != "" { - if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", opts.Branch); err != nil { - return err - } - } - return nil - } - if err := os.MkdirAll(opts.RepoPath, 0o755); err != nil { - return fmt.Errorf("mkdir share repo: %w", err) - } - if err := run(ctx, opts.RepoPath, "git", "init"); err != nil { - return err - } - if strings.TrimSpace(opts.Branch) != "" { - if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", opts.Branch); err != nil { - return err - } - } - return nil + return mirror.EnsureRepo(ctx, mirrorOptions(opts)) } func Pull(ctx context.Context, opts Options) error { - if strings.TrimSpace(opts.Remote) == "" { + if strings.TrimSpace(opts.Remote) == "" && strings.TrimSpace(opts.RepoPath) == "" { return nil } - if err := EnsureRepo(ctx, opts); err != nil { - return err - } - if err := run(ctx, opts.RepoPath, "git", "fetch", "--prune", "origin"); err != nil { - return err - } - branch := opts.Branch - if strings.TrimSpace(branch) == "" { - branch = "main" - } - remoteRef := "refs/remotes/origin/" + branch - if _, err := output(ctx, opts.RepoPath, "git", "rev-parse", "--verify", remoteRef); err != nil { - return run(ctx, opts.RepoPath, "git", "checkout", "-B", branch) - } - if err := run(ctx, opts.RepoPath, "git", "checkout", "-B", branch, "origin/"+branch); err != nil { - return err - } - return run(ctx, opts.RepoPath, "git", "pull", "--ff-only", "origin", branch) + return mirror.Pull(ctx, mirrorOptions(opts)) } func Commit(ctx context.Context, opts Options, message string) (bool, error) { - if err := run(ctx, opts.RepoPath, "git", "add", "."); err != nil { - return false, err - } - out, err := output(ctx, opts.RepoPath, "git", "status", "--porcelain") - if err != nil { - return false, err - } - if strings.TrimSpace(out) == "" { - return false, nil - } - if strings.TrimSpace(message) == "" { - message = "sync: discord archive" - } - if err := run(ctx, opts.RepoPath, "git", "commit", "-m", message); err != nil { - return false, err - } - return true, nil + return mirror.Commit(ctx, mirrorOptions(opts), message) } func Push(ctx context.Context, opts Options) error { - branch := opts.Branch - if strings.TrimSpace(branch) == "" { - branch = "main" + if err := mirror.Push(ctx, mirrorOptions(opts)); err != nil { + branch := opts.Branch + if strings.TrimSpace(branch) == "" { + branch = "main" + } + return fmt.Errorf("git push -u origin %s: %w", branch, err) } - out, err := output(ctx, opts.RepoPath, "git", "push", "-u", "origin", branch) - if err == nil { - return nil - } - if !isNonFastForwardPush(out) { - return fmt.Errorf("git push -u origin %s: %w\n%s", branch, err, strings.TrimSpace(out)) - } - if pullErr := run(ctx, opts.RepoPath, "git", "pull", "--rebase", "--autostash", "origin", branch); pullErr != nil { - return fmt.Errorf("rebase before push retry: %w", pullErr) - } - return run(ctx, opts.RepoPath, "git", "push", "-u", "origin", branch) + return nil } func Export(ctx context.Context, s *store.Store, opts Options) (Manifest, error) { if err := EnsureRepo(ctx, opts); err != nil { return Manifest{}, err } - if err := os.RemoveAll(filepath.Join(opts.RepoPath, "tables")); err != nil { - return Manifest{}, fmt.Errorf("reset tables dir: %w", err) - } - if err := os.MkdirAll(filepath.Join(opts.RepoPath, "tables"), 0o755); err != nil { - return Manifest{}, fmt.Errorf("mkdir tables dir: %w", err) + base, err := snapshot.Export(ctx, snapshot.ExportOptions{ + DB: s.DB(), + RootDir: opts.RepoPath, + Tables: SnapshotTables, + MaxShardBytes: maxShardBytes, + Filter: func(table string, row map[string]any) (bool, error) { + return !isDirectMessageSnapshotRow(table, row), nil + }, + }) + if err != nil { + return Manifest{}, err } manifest := Manifest{ - Version: 1, - GeneratedAt: time.Now().UTC(), - Files: map[string]string{"manifest": ManifestName}, - } - for _, table := range SnapshotTables { - entry, err := exportTable(ctx, s.DB(), opts.RepoPath, table) - if err != nil { - return Manifest{}, err - } - manifest.Tables = append(manifest.Tables, entry) + Version: base.Version, + GeneratedAt: base.GeneratedAt, + Tables: base.Tables, + Files: base.Files, } if opts.IncludeEmbeddings { entry, err := exportEmbeddings(ctx, s.DB(), opts) @@ -243,53 +170,51 @@ func Import(ctx context.Context, s *store.Store, opts Options) (Manifest, error) _ = restorePragmas(ctx) } }() - tx, err := s.DB().BeginTx(ctx, nil) - if err != nil { + if _, err := snapshot.Import(ctx, snapshot.ImportOptions{ + DB: s.DB(), + RootDir: opts.RepoPath, + DeleteTables: SnapshotTables, + Progress: func(progress snapshot.ImportProgress) { + opts.reportProgress(ImportProgress{ + Phase: progress.Phase, + Table: progress.Table, + File: progress.File, + FileIndex: progress.FileIndex, + FileCount: progress.FileCount, + Rows: progress.Rows, + TotalRows: progress.TotalRows, + }) + }, + Filter: func(table string, row map[string]any) (bool, error) { + return !isDirectMessageSnapshotRow(table, row), nil + }, + BeforeImport: func(ctx context.Context, tx *sql.Tx) error { + for _, table := range []string{"message_fts", "member_fts"} { + if _, err := tx.ExecContext(ctx, "drop table if exists "+table); err != nil { + return fmt.Errorf("drop %s: %w", table, err) + } + } + return nil + }, + DeleteTable: func(ctx context.Context, tx *sql.Tx, table string) error { + query, args := snapshotDeleteQuery(table) + if _, err := tx.ExecContext(ctx, query, args...); err != nil { + return fmt.Errorf("clear %s: %w", table, err) + } + return nil + }, + AfterImport: func(ctx context.Context, tx *sql.Tx) error { + if err := repairImportedGuildIDs(ctx, tx); err != nil { + return err + } + if opts.IncludeEmbeddings { + return importEmbeddings(ctx, tx, opts, manifest.Embeddings) + } + return nil + }, + }); err != nil { return Manifest{}, err } - committed := false - defer func() { - if !committed { - _ = tx.Rollback() - } - }() - for _, table := range []string{"message_fts", "member_fts"} { - opts.reportProgress(ImportProgress{Phase: "drop_fts", Table: table}) - if _, err := tx.ExecContext(ctx, "drop table if exists "+table); err != nil { - return Manifest{}, fmt.Errorf("drop %s: %w", table, err) - } - } - for _, table := range slices.Backward(SnapshotTables) { - opts.reportProgress(ImportProgress{Phase: "clear", Table: table}) - query, args := snapshotDeleteQuery(table) - if _, err := tx.ExecContext(ctx, query, args...); err != nil { - return Manifest{}, fmt.Errorf("clear %s: %w", table, err) - } - } - for _, table := range manifest.Tables { - if err := ctx.Err(); err != nil { - return Manifest{}, err - } - opts.reportProgress(ImportProgress{Phase: "table_start", Table: table.Name, TotalRows: table.Rows}) - if err := importTable(ctx, tx, opts, table); err != nil { - return Manifest{}, err - } - opts.reportProgress(ImportProgress{Phase: "table_done", Table: table.Name, TotalRows: table.Rows}) - } - opts.reportProgress(ImportProgress{Phase: "repair"}) - if err := repairImportedGuildIDs(ctx, tx); err != nil { - return Manifest{}, err - } - if opts.IncludeEmbeddings { - if err := importEmbeddings(ctx, tx, opts, manifest.Embeddings); err != nil { - return Manifest{}, err - } - } - opts.reportProgress(ImportProgress{Phase: "commit"}) - if err := tx.Commit(); err != nil { - return Manifest{}, err - } - committed = true opts.reportProgress(ImportProgress{Phase: "rebuild_fts"}) if err := s.RebuildSearchIndexes(ctx); err != nil { return Manifest{}, err @@ -436,6 +361,10 @@ func ReadManifest(repoPath string) (Manifest, error) { return manifest, nil } +func mirrorOptions(opts Options) mirror.Options { + return mirror.Options{RepoPath: opts.RepoPath, Remote: opts.Remote, Branch: opts.Branch} +} + func NeedsImport(ctx context.Context, s *store.Store, staleAfter time.Duration) bool { if staleAfter <= 0 { staleAfter = 15 * time.Minute diff --git a/internal/share/share_test.go b/internal/share/share_test.go index 121cce0..d0995b2 100644 --- a/internal/share/share_test.go +++ b/internal/share/share_test.go @@ -184,6 +184,26 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) { require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "channels")), directMessageGuildID) require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "messages")), "private dm content") require.NotContains(t, snapshotTableText(t, repo, tableEntry(t, manifest, "sync_state")), "wiretap:last_import") + manifest = appendSnapshotRow(t, repo, manifest, "messages", map[string]any{ + "id": "hostile-dm", + "guild_id": directMessageGuildID, + "channel_id": "dm-c2", + "author_id": "u9", + "message_type": 0, + "created_at": "2026-04-24T16:00:00Z", + "content": "hostile imported dm", + "normalized_content": "hostile imported dm", + "pinned": 0, + "has_attachments": 0, + "raw_json": `{}`, + "updated_at": "2026-04-24T16:00:00Z", + }) + manifest = appendSnapshotRow(t, repo, manifest, "sync_state", map[string]any{ + "scope": "wiretap:hostile", + "cursor": "private", + "updated_at": "2026-04-24T16:00:00Z", + }) + writeShareManifest(t, repo, manifest) dst, err := store.Open(ctx, filepath.Join(t.TempDir(), "dst.db")) require.NoError(t, err) @@ -202,6 +222,12 @@ func TestSnapshotExcludesAndPreservesDirectMessages(t *testing.T) { wiretapState, err := dst.GetSyncState(ctx, "wiretap:last_import") require.NoError(t, err) require.Equal(t, "2026-04-24T15:33:17Z", wiretapState) + hostileResults, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "hostile imported dm", Limit: 10}) + require.NoError(t, err) + require.Empty(t, hostileResults) + _, rows, err := dst.ReadOnlyQuery(ctx, "select count(*) from sync_state where scope = 'wiretap:hostile'") + require.NoError(t, err) + require.Equal(t, "0", rows[0][0]) } func TestExportImportEmbeddingsOptIn(t *testing.T) { @@ -800,6 +826,33 @@ func writeGzipJSONLines(t *testing.T, path string, lines []string) { require.NoError(t, file.Close()) } +func appendSnapshotRow(t *testing.T, repo string, manifest Manifest, tableName string, row map[string]any) Manifest { + t.Helper() + for i := range manifest.Tables { + if manifest.Tables[i].Name != tableName { + continue + } + rel := filepath.ToSlash(filepath.Join("tables", tableName, "hostile-"+strconv.Itoa(len(manifest.Tables[i].Files))+".jsonl.gz")) + full := filepath.Join(repo, filepath.FromSlash(rel)) + require.NoError(t, os.MkdirAll(filepath.Dir(full), 0o755)) + body, err := json.Marshal(row) + require.NoError(t, err) + writeGzipJSONLines(t, full, []string{string(body)}) + manifest.Tables[i].Files = append(manifest.Tables[i].Files, rel) + manifest.Tables[i].Rows++ + return manifest + } + t.Fatalf("table %s not found", tableName) + return manifest +} + +func writeShareManifest(t *testing.T, repo string, manifest Manifest) { + t.Helper() + body, err := json.MarshalIndent(manifest, "", " ") + require.NoError(t, err) + require.NoError(t, os.WriteFile(filepath.Join(repo, ManifestName), append(body, '\n'), 0o600)) +} + func snapshotTableText(t *testing.T, repo string, table TableManifest) string { t.Helper() return snapshotFilesText(t, repo, table.Files) diff --git a/internal/store/mentions_test.go b/internal/store/mentions_test.go index 47eafa8..2905528 100644 --- a/internal/store/mentions_test.go +++ b/internal/store/mentions_test.go @@ -92,6 +92,8 @@ func TestAttachmentTextAndMentionsAreQueryable(t *testing.T) { require.NoError(t, err) require.Len(t, messages, 1) require.Contains(t, messages[0].Content, "stack trace") + require.Equal(t, "trace.txt", messages[0].AttachmentNames) + require.Contains(t, messages[0].AttachmentText, "stack trace line one") mentions, err := s.ListMentions(ctx, MentionListOptions{Target: "Shadow", Limit: 10}) require.NoError(t, err) @@ -116,3 +118,72 @@ func TestAttachmentTextAndMentionsAreQueryable(t *testing.T) { require.NoError(t, err) require.Len(t, filtered, 1) } + +func TestListMessagesResolvesMentionNamesForDisplay(t *testing.T) { + t.Parallel() + + ctx := context.Background() + s, err := Open(ctx, filepath.Join(t.TempDir(), "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + + require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "maintainers", RawJSON: `{}`})) + require.NoError(t, s.UpsertMember(ctx, MemberRecord{ + GuildID: "g1", + UserID: "u4", + Username: "fallback", + DisplayName: "Fallback User", + RoleIDsJSON: `[]`, + RawJSON: `{}`, + })) + + createdAt := time.Now().UTC().Format(time.RFC3339Nano) + rawContent := "ping <@u2> <@!u3> <@&r1> in <#c1>" + fallbackContent := "ask <@u4> in <#c1>" + require.NoError(t, s.UpsertMessages(ctx, []MessageMutation{ + { + Record: MessageRecord{ + ID: "m1", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "maintainers", + AuthorID: "u1", + AuthorName: "Peter", + MessageType: 0, + CreatedAt: createdAt, + Content: rawContent, + NormalizedContent: rawContent, + RawJSON: `{}`, + }, + Mentions: []MentionEventRecord{ + {MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "user", TargetID: "u2", TargetName: "Shadow", EventAt: createdAt}, + {MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "user", TargetID: "u3", TargetName: "Vincent", EventAt: createdAt}, + {MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "role", TargetID: "r1", TargetName: "Maintainers", EventAt: createdAt}, + {MessageID: "m1", GuildID: "g1", ChannelID: "c1", AuthorID: "u1", TargetType: "channel", TargetID: "c1", TargetName: "maintainers", EventAt: createdAt}, + }, + }, + { + Record: MessageRecord{ + ID: "m2", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "maintainers", + AuthorID: "u1", + AuthorName: "Peter", + MessageType: 0, + CreatedAt: createdAt, + Content: fallbackContent, + NormalizedContent: fallbackContent, + RawJSON: `{}`, + }, + }, + })) + + messages, err := s.ListMessages(ctx, MessageListOptions{Channel: "maintainers", Limit: 10}) + require.NoError(t, err) + require.Len(t, messages, 2) + require.Equal(t, rawContent, messages[0].Content) + require.Equal(t, "ping @Shadow @Vincent @Maintainers in #maintainers", messages[0].DisplayContent) + require.Equal(t, fallbackContent, messages[1].Content) + require.Equal(t, "ask @Fallback User in #maintainers", messages[1].DisplayContent) +} diff --git a/internal/store/messages.go b/internal/store/messages.go index 7628b85..1ed10e5 100644 --- a/internal/store/messages.go +++ b/internal/store/messages.go @@ -2,10 +2,16 @@ package store import ( "context" + "regexp" "strings" "time" ) +var ( + discordUserMentionRE = regexp.MustCompile(`<@!?([A-Za-z0-9]+)>`) + discordChannelMentionRE = regexp.MustCompile(`<#([A-Za-z0-9]+)>`) +) + type MessageListOptions struct { GuildIDs []string Channel string @@ -29,17 +35,22 @@ type MentionListOptions struct { } type MessageRow struct { - MessageID string `json:"message_id"` - GuildID string `json:"guild_id"` - ChannelID string `json:"channel_id"` - ChannelName string `json:"channel_name"` - AuthorID string `json:"author_id"` - AuthorName string `json:"author_name"` - Content string `json:"content"` - CreatedAt time.Time `json:"created_at"` - ReplyToMessage string `json:"reply_to_message_id,omitempty"` - HasAttachments bool `json:"has_attachments"` - Pinned bool `json:"pinned"` + MessageID string `json:"message_id"` + GuildID string `json:"guild_id"` + GuildName string `json:"guild_name,omitempty"` + ChannelID string `json:"channel_id"` + ChannelName string `json:"channel_name"` + AuthorID string `json:"author_id"` + AuthorName string `json:"author_name"` + Content string `json:"content"` + DisplayContent string `json:"display_content,omitempty"` + CreatedAt time.Time `json:"created_at"` + ReplyToMessage string `json:"reply_to_message_id,omitempty"` + Source string `json:"source,omitempty"` + HasAttachments bool `json:"has_attachments"` + AttachmentNames string `json:"attachment_names,omitempty"` + AttachmentText string `json:"attachment_text,omitempty"` + Pinned bool `json:"pinned"` } func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]MessageRow, error) { @@ -75,6 +86,7 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me select m.id, m.guild_id, + coalesce(g.name, ''), m.channel_id, coalesce(c.name, ''), coalesce(m.author_id, ''), @@ -93,9 +105,13 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me end, m.created_at, coalesce(m.reply_to_message_id, ''), + coalesce(json_extract(m.raw_json, '$.source'), ''), m.has_attachments, + coalesce((select group_concat(a.filename, ', ') from message_attachments a where a.message_id = m.id), ''), + coalesce((select group_concat(a.text_content, char(10)) from message_attachments a where a.message_id = m.id and trim(a.text_content) <> ''), ''), m.pinned from messages m + left join guilds g on g.id = m.guild_id left join channels c on c.id = m.channel_id left join members mem on mem.guild_id = m.guild_id and mem.user_id = m.author_id where ` + strings.Join(clauses, " and ") + ` @@ -139,6 +155,7 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me if err := rows.Scan( &row.MessageID, &row.GuildID, + &row.GuildName, &row.ChannelID, &row.ChannelName, &row.AuthorID, @@ -146,7 +163,10 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me &row.Content, &created, &row.ReplyToMessage, + &row.Source, &hasAttachments, + &row.AttachmentNames, + &row.AttachmentText, &pinned, ); err != nil { return nil, err @@ -154,11 +174,364 @@ func (s *Store) ListMessages(ctx context.Context, opts MessageListOptions) ([]Me row.CreatedAt = parseTime(created) row.HasAttachments = hasAttachments == 1 row.Pinned = pinned == 1 + row.DisplayContent = row.Content out = append(out, row) } - return out, rows.Err() + if err := rows.Err(); err != nil { + return nil, err + } + return out, s.resolveMessageDisplayMentions(ctx, out) +} + +func (s *Store) ListMessagesWithThreadContext(ctx context.Context, opts MessageListOptions) ([]MessageRow, error) { + rows, err := s.ListMessages(ctx, opts) + if err != nil { + return nil, err + } + return s.hydrateMessageThreadContext(ctx, rows, opts.Limit+opts.Last) +} + +func (s *Store) hydrateMessageThreadContext(ctx context.Context, rows []MessageRow, limit int) ([]MessageRow, error) { + if len(rows) == 0 { + return rows, nil + } + rootIDs := make([]any, 0, len(rows)) + seenRoots := map[string]struct{}{} + visible := map[string]struct{}{} + for _, row := range rows { + if id := strings.TrimSpace(row.MessageID); id != "" { + visible[id] = struct{}{} + } + } + for _, row := range rows { + rootID := strings.TrimSpace(row.ReplyToMessage) + if rootID == "" { + continue + } + if _, ok := visible[rootID]; ok { + continue + } + if _, ok := seenRoots[rootID]; ok { + continue + } + seenRoots[rootID] = struct{}{} + rootIDs = append(rootIDs, rootID) + } + if len(rootIDs) == 0 { + return rows, nil + } + query := ` + select + m.id, + m.guild_id, + coalesce(g.name, ''), + m.channel_id, + coalesce(c.name, ''), + coalesce(m.author_id, ''), + coalesce( + nullif(mem.display_name, ''), + nullif(mem.nick, ''), + nullif(mem.global_name, ''), + nullif(mem.username, ''), + nullif(json_extract(m.raw_json, '$.author.global_name'), ''), + nullif(json_extract(m.raw_json, '$.author.username'), ''), + '' + ), + case + when trim(coalesce(m.content, '')) <> '' then m.content + else m.normalized_content + end, + m.created_at, + coalesce(m.reply_to_message_id, ''), + coalesce(json_extract(m.raw_json, '$.source'), ''), + m.has_attachments, + coalesce((select group_concat(a.filename, ', ') from message_attachments a where a.message_id = m.id), ''), + coalesce((select group_concat(a.text_content, char(10)) from message_attachments a where a.message_id = m.id and trim(a.text_content) <> ''), ''), + m.pinned + from messages m + left join guilds g on g.id = m.guild_id + left join channels c on c.id = m.channel_id + left join members mem on mem.guild_id = m.guild_id and mem.user_id = m.author_id + where m.id in (` + placeholders(len(rootIDs)) + `) + order by m.created_at asc, m.id asc` + contextRows, err := s.db.QueryContext(ctx, query, rootIDs...) + if err != nil { + return nil, err + } + defer func() { _ = contextRows.Close() }() + extra, err := scanMessageRows(contextRows) + if err != nil { + return nil, err + } + if err := s.resolveMessageDisplayMentions(ctx, extra); err != nil { + return nil, err + } + return mergeMessageRows(rows, extra), nil +} + +func scanMessageRows(rows rowScanner) ([]MessageRow, error) { + var out []MessageRow + for rows.Next() { + var row MessageRow + var created string + var hasAttachments int + var pinned int + if err := rows.Scan( + &row.MessageID, + &row.GuildID, + &row.GuildName, + &row.ChannelID, + &row.ChannelName, + &row.AuthorID, + &row.AuthorName, + &row.Content, + &created, + &row.ReplyToMessage, + &row.Source, + &hasAttachments, + &row.AttachmentNames, + &row.AttachmentText, + &pinned, + ); err != nil { + return nil, err + } + row.CreatedAt = parseTime(created) + row.HasAttachments = hasAttachments == 1 + row.Pinned = pinned == 1 + row.DisplayContent = row.Content + out = append(out, row) + } + if err := rows.Err(); err != nil { + return nil, err + } + return out, nil +} + +type rowScanner interface { + Next() bool + Scan(dest ...any) error + Err() error +} + +func mergeMessageRows(primary, extra []MessageRow) []MessageRow { + out := make([]MessageRow, 0, len(primary)+len(extra)) + seen := map[string]struct{}{} + appendRow := func(row MessageRow) { + key := row.GuildID + "\x00" + row.ChannelID + "\x00" + row.MessageID + if _, ok := seen[key]; ok { + return + } + seen[key] = struct{}{} + out = append(out, row) + } + for _, row := range primary { + appendRow(row) + } + for _, row := range extra { + appendRow(row) + } + return out } func normalizeChannelFilter(raw string) string { return strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(raw), "#")) } + +func (s *Store) resolveMessageDisplayMentions(ctx context.Context, rows []MessageRow) error { + if len(rows) == 0 { + return nil + } + ids := make([]any, 0, len(rows)) + indexByID := make(map[string]int, len(rows)) + for index, row := range rows { + id := strings.TrimSpace(row.MessageID) + if id == "" { + continue + } + ids = append(ids, id) + indexByID[id] = index + } + if len(ids) == 0 { + return nil + } + query := `select message_id, target_type, target_id, target_name from mention_events where message_id in (` + placeholders(len(ids)) + `)` + mentionRows, err := s.db.QueryContext(ctx, query, ids...) + if err != nil { + return err + } + defer func() { _ = mentionRows.Close() }() + for mentionRows.Next() { + var messageID, targetType, targetID, targetName string + if err := mentionRows.Scan(&messageID, &targetType, &targetID, &targetName); err != nil { + return err + } + index, ok := indexByID[messageID] + if !ok { + continue + } + rows[index].DisplayContent = replaceDiscordMention(rows[index].DisplayContent, targetType, targetID, targetName) + } + if err := mentionRows.Err(); err != nil { + return err + } + return s.resolveInlineDiscordMentions(ctx, rows) +} + +func replaceDiscordMention(content, targetType, targetID, targetName string) string { + targetID = strings.TrimSpace(targetID) + if targetID == "" { + return content + } + label := strings.TrimSpace(targetName) + if label == "" { + label = targetID + } + switch strings.TrimSpace(targetType) { + case "role": + return strings.ReplaceAll(content, "<@&"+targetID+">", "@"+label) + case "channel": + return strings.ReplaceAll(content, "<#"+targetID+">", "#"+label) + default: + content = strings.ReplaceAll(content, "<@"+targetID+">", "@"+label) + return strings.ReplaceAll(content, "<@!"+targetID+">", "@"+label) + } +} + +func (s *Store) resolveInlineDiscordMentions(ctx context.Context, rows []MessageRow) error { + userIDs := map[string]struct{}{} + channelIDs := map[string]struct{}{} + for _, row := range rows { + for _, match := range discordUserMentionRE.FindAllStringSubmatch(row.DisplayContent, -1) { + if len(match) > 1 && strings.TrimSpace(match[1]) != "" { + userIDs[match[1]] = struct{}{} + } + } + for _, match := range discordChannelMentionRE.FindAllStringSubmatch(row.DisplayContent, -1) { + if len(match) > 1 && strings.TrimSpace(match[1]) != "" { + channelIDs[match[1]] = struct{}{} + } + } + } + userNames, err := s.discordMemberDisplayNames(ctx, userIDs) + if err != nil { + return err + } + channelNames, err := s.discordChannelNames(ctx, channelIDs) + if err != nil { + return err + } + for index := range rows { + guildID := strings.TrimSpace(rows[index].GuildID) + rows[index].DisplayContent = discordUserMentionRE.ReplaceAllStringFunc(rows[index].DisplayContent, func(match string) string { + parts := discordUserMentionRE.FindStringSubmatch(match) + if len(parts) < 2 { + return match + } + if name := firstResolvedDiscordName(userNames, guildID, parts[1]); name != "" { + return "@" + name + } + return match + }) + rows[index].DisplayContent = discordChannelMentionRE.ReplaceAllStringFunc(rows[index].DisplayContent, func(match string) string { + parts := discordChannelMentionRE.FindStringSubmatch(match) + if len(parts) < 2 { + return match + } + if name := firstResolvedDiscordName(channelNames, guildID, parts[1]); name != "" { + return "#" + name + } + return match + }) + } + return nil +} + +func (s *Store) discordMemberDisplayNames(ctx context.Context, ids map[string]struct{}) (map[string]string, error) { + if len(ids) == 0 { + return nil, nil + } + args := mapKeysAsAny(ids) + query := ` + select guild_id, user_id, + coalesce( + nullif(display_name, ''), + nullif(nick, ''), + nullif(global_name, ''), + nullif(username, ''), + '' + ) + from members + where user_id in (` + placeholders(len(args)) + `) + ` + rows, err := s.db.QueryContext(ctx, query, args...) + if err != nil { + return nil, err + } + defer func() { _ = rows.Close() }() + out := map[string]string{} + for rows.Next() { + var guildID, userID, name string + if err := rows.Scan(&guildID, &userID, &name); err != nil { + return nil, err + } + rememberResolvedDiscordName(out, guildID, userID, name) + } + return out, rows.Err() +} + +func (s *Store) discordChannelNames(ctx context.Context, ids map[string]struct{}) (map[string]string, error) { + if len(ids) == 0 { + return nil, nil + } + args := mapKeysAsAny(ids) + query := `select guild_id, id, coalesce(nullif(name, ''), '') from channels where id in (` + placeholders(len(args)) + `)` + rows, err := s.db.QueryContext(ctx, query, args...) + if err != nil { + return nil, err + } + defer func() { _ = rows.Close() }() + out := map[string]string{} + for rows.Next() { + var guildID, channelID, name string + if err := rows.Scan(&guildID, &channelID, &name); err != nil { + return nil, err + } + rememberResolvedDiscordName(out, guildID, channelID, name) + } + return out, rows.Err() +} + +func mapKeysAsAny(values map[string]struct{}) []any { + out := make([]any, 0, len(values)) + for value := range values { + out = append(out, value) + } + return out +} + +func rememberResolvedDiscordName(out map[string]string, guildID, id, name string) { + id = strings.TrimSpace(id) + name = strings.TrimSpace(name) + if id == "" || name == "" { + return + } + if guildID = strings.TrimSpace(guildID); guildID != "" { + out[guildID+"|"+id] = name + } + if _, ok := out["|"+id]; !ok { + out["|"+id] = name + } +} + +func firstResolvedDiscordName(values map[string]string, guildID, id string) string { + id = strings.TrimSpace(id) + if id == "" { + return "" + } + if guildID = strings.TrimSpace(guildID); guildID != "" { + if value := strings.TrimSpace(values[guildID+"|"+id]); value != "" { + return value + } + } + return strings.TrimSpace(values["|"+id]) +} diff --git a/internal/store/store.go b/internal/store/store.go index 9d57b72..23d1e6e 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -5,13 +5,10 @@ import ( "database/sql" "fmt" "hash/fnv" - "os" - "path/filepath" - "runtime" "strconv" "time" - _ "modernc.org/sqlite" + crawlstore "github.com/vincentkoc/crawlkit/store" ) const ( @@ -114,66 +111,33 @@ type ChannelRow struct { } func Open(ctx context.Context, path string) (*Store, error) { - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return nil, fmt.Errorf("mkdir db dir: %w", err) - } - if err := ensureDBFile(path); err != nil { - return nil, err - } - dsn := fmt.Sprintf( - "file:%s?_pragma=foreign_keys(1)&_pragma=journal_mode(WAL)&_pragma=synchronous(NORMAL)&_pragma=temp_store(MEMORY)&_pragma=mmap_size(268435456)&_pragma=busy_timeout(5000)", - path, - ) - db, err := sql.Open("sqlite", dsn) + base, err := crawlstore.Open(ctx, crawlstore.Options{Path: path}) if err != nil { - return nil, fmt.Errorf("open sqlite: %w", err) - } - // SQLite is single-writer; keep one shared connection so concurrent callers queue - // instead of contending on separate writer connections. - db.SetMaxOpenConns(1) - db.SetMaxIdleConns(1) - if err := db.PingContext(ctx); err != nil { - _ = db.Close() - return nil, fmt.Errorf("ping sqlite: %w", err) - } - if err := tightenDBFilePerms(path); err != nil { - _ = db.Close() return nil, err } + db := base.DB() store := &Store{db: db, path: path} if err := store.migrate(ctx); err != nil { - _ = db.Close() + _ = base.Close() return nil, err } return store, nil } -func ensureDBFile(path string) error { - if _, err := os.Stat(path); err == nil { - return nil - } else if !os.IsNotExist(err) { - return fmt.Errorf("stat db file: %w", err) +func OpenReadOnly(ctx context.Context, path string) (*Store, error) { + base, err := crawlstore.OpenReadOnly(ctx, path) + if err != nil { + return nil, err } - file, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600) - if err != nil && !os.IsExist(err) { - return fmt.Errorf("create db file: %w", err) + store := &Store{db: base.DB(), path: path} + if version, err := store.schemaVersion(ctx); err != nil { + _ = base.Close() + return nil, err + } else if version != storeSchemaVersion { + _ = base.Close() + return nil, fmt.Errorf("database schema version mismatch: got %d want %d", version, storeSchemaVersion) } - if file != nil { - if closeErr := file.Close(); closeErr != nil { - return fmt.Errorf("close db file: %w", closeErr) - } - } - return nil -} - -func tightenDBFilePerms(path string) error { - if runtime.GOOS == "windows" { - return nil - } - if err := os.Chmod(path, 0o600); err != nil { - return fmt.Errorf("chmod db file: %w", err) - } - return nil + return store, nil } func (s *Store) Close() error { diff --git a/internal/store/store_test.go b/internal/store/store_test.go index 6e9e4b4..2d1e183 100644 --- a/internal/store/store_test.go +++ b/internal/store/store_test.go @@ -149,8 +149,9 @@ func TestStoreMaintenanceHelpers(t *testing.T) { require.NoError(t, err) defer func() { _ = s.Close() }() - require.NoError(t, ensureDBFile(dbPath)) - require.NoError(t, tightenDBFilePerms(dbPath)) + info, err := os.Stat(dbPath) + require.NoError(t, err) + require.Equal(t, os.FileMode(0o600), info.Mode().Perm()) require.NoError(t, s.RebuildSearchIndexes(ctx)) version, err := s.schemaVersion(ctx) require.NoError(t, err) @@ -1519,6 +1520,7 @@ func TestListMessagesFiltersAndLimit(t *testing.T) { require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "maintainers", RawJSON: `{}`})) require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c2", GuildID: "g1", Kind: "text", Name: "random", RawJSON: `{}`})) + require.NoError(t, s.UpsertGuild(ctx, GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`})) require.NoError(t, s.UpsertMember(ctx, MemberRecord{ GuildID: "g1", UserID: "u1", @@ -1625,6 +1627,7 @@ func TestListMessagesFiltersAndLimit(t *testing.T) { require.Len(t, rows, 1) require.Equal(t, "m4", rows[0].MessageID) require.Equal(t, "fallback-user", rows[0].AuthorName) + require.Equal(t, "Guild", rows[0].GuildName) require.True(t, rows[0].Pinned) require.True(t, rows[0].HasAttachments) @@ -1666,6 +1669,49 @@ func TestListMessagesFiltersAndLimit(t *testing.T) { require.Equal(t, "m4", rows[1].MessageID) } +func TestListMessagesWithThreadContextHydratesReplyRoot(t *testing.T) { + t.Parallel() + + ctx := context.Background() + s, err := Open(ctx, filepath.Join(t.TempDir(), "discrawl.db")) + require.NoError(t, err) + defer func() { _ = s.Close() }() + + require.NoError(t, s.UpsertGuild(ctx, GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`})) + require.NoError(t, s.UpsertChannel(ctx, ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`})) + require.NoError(t, s.UpsertMessage(ctx, MessageRecord{ + ID: "root", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u1", + MessageType: 0, + CreatedAt: "2026-03-01T10:00:00Z", + Content: "root message", + NormalizedContent: "root message", + RawJSON: `{}`, + })) + require.NoError(t, s.UpsertMessage(ctx, MessageRecord{ + ID: "reply", + GuildID: "g1", + ChannelID: "c1", + ChannelName: "general", + AuthorID: "u2", + MessageType: 0, + CreatedAt: "2026-03-02T10:00:00Z", + Content: "reply message", + NormalizedContent: "reply message", + ReplyToMessageID: "root", + RawJSON: `{}`, + })) + + rows, err := s.ListMessagesWithThreadContext(ctx, MessageListOptions{Last: 1}) + require.NoError(t, err) + require.Len(t, rows, 2) + require.Equal(t, "reply", rows[0].MessageID) + require.Equal(t, "root", rows[1].MessageID) +} + func TestNormalizeFTSQueryEdgeCases(t *testing.T) { t.Parallel() diff --git a/internal/syncer/message_sync.go b/internal/syncer/message_sync.go index 8ac86c8..158ef4f 100644 --- a/internal/syncer/message_sync.go +++ b/internal/syncer/message_sync.go @@ -7,6 +7,8 @@ import ( "time" "github.com/bwmarrin/discordgo" + "github.com/vincentkoc/crawlkit/progress" + "github.com/openclaw/discrawl/internal/store" ) @@ -663,6 +665,8 @@ func (p *messageSyncProgress) complete(channel *discordgo.Channel, count int, ou totalChannels := p.totalChannels messages := p.messages elapsed := now.Sub(p.startedAt).Round(time.Second).String() + percent := progress.Percent(int64(processed), int64(totalChannels)) + completion := progress.Completion(int64(processed), int64(totalChannels)) p.mu.Unlock() p.syncer.logger.Info( "message sync progress", @@ -670,6 +674,8 @@ func (p *messageSyncProgress) complete(channel *discordgo.Channel, count int, ou "processed_channels", processed, "total_channels", totalChannels, "remaining_channels", totalChannels-processed, + "percent", percent, + "completion", completion, "active_channels", activeChannels, "messages_written", messages, "deferred_channels", deferred, @@ -698,6 +704,8 @@ func (p *messageSyncProgress) finish(err error) { totalChannels := p.totalChannels messages := p.messages elapsed := now.Sub(p.startedAt).Round(time.Second).String() + percent := progress.Percent(int64(processed), int64(totalChannels)) + completion := progress.Completion(int64(processed), int64(totalChannels)) oldestID, oldestName, oldestElapsed, oldestIdle, oldestPages, oldestPageMessages := oldestInflightDetails(p.inflight, now) p.mu.Unlock() attrs := []any{ @@ -705,6 +713,8 @@ func (p *messageSyncProgress) finish(err error) { "processed_channels", processed, "total_channels", totalChannels, "remaining_channels", totalChannels - processed, + "percent", percent, + "completion", completion, "active_channels", activeChannels, "messages_written", messages, "deferred_channels", deferred, @@ -766,6 +776,8 @@ func (p *messageSyncProgress) logWaitHeartbeat() { messages := p.messages idleFor := now.Sub(p.lastProgressAt).Round(time.Second).String() elapsed := now.Sub(p.startedAt).Round(time.Second).String() + percent := progress.Percent(int64(processed), int64(totalChannels)) + completion := progress.Completion(int64(processed), int64(totalChannels)) oldestID, oldestName, oldestElapsed, oldestIdle, oldestPages, oldestPageMessages := oldestInflightDetails(p.inflight, now) p.mu.Unlock() p.syncer.logger.Info( @@ -774,6 +786,8 @@ func (p *messageSyncProgress) logWaitHeartbeat() { "processed_channels", processed, "total_channels", totalChannels, "remaining_channels", totalChannels-processed, + "percent", percent, + "completion", completion, "active_channels", activeChannels, "messages_written", messages, "deferred_channels", deferred, diff --git a/internal/syncer/message_sync_progress_test.go b/internal/syncer/message_sync_progress_test.go index e266a9b..436a572 100644 --- a/internal/syncer/message_sync_progress_test.go +++ b/internal/syncer/message_sync_progress_test.go @@ -73,6 +73,8 @@ func TestMessageSyncProgressFinishReportsSummaryCounts(t *testing.T) { logs := out.String() require.Contains(t, logs, `msg="message sync finished"`) require.Contains(t, logs, `processed_channels=3`) + require.Contains(t, logs, `percent=100.0`) + require.Contains(t, logs, `completion=100.0%`) require.Contains(t, logs, `messages_written=42`) require.Contains(t, logs, `skipped_missing_access_channels=1`) require.Contains(t, logs, `skipped_unknown_channel_channels=1`) @@ -105,4 +107,6 @@ func TestMessageSyncProgressReportsWaitingHeartbeat(t *testing.T) { require.Contains(t, logs, `oldest_active_channel_id=c1`) require.Contains(t, logs, `oldest_active_channel_name=slowpoke`) require.Contains(t, logs, `active_channels=1`) + require.Contains(t, logs, `percent=0.0`) + require.Contains(t, logs, `completion=0.0%`) }