Compare commits

...

143 Commits

Author SHA1 Message Date
Peter Steinberger
a1def2c98f
fix: accept trailing search flags
Some checks failed
ci / lint (push) Has been cancelled
ci / test (push) Has been cancelled
ci / deps (push) Has been cancelled
ci / release-check (push) Has been cancelled
ci / secrets (push) Has been cancelled
CodeQL / analyze (push) Has been cancelled
Pages / Deploy docs (push) Has been cancelled
Security Gate: Secret Scanning / Scan for Verified Secrets (push) Has been cancelled
2026-05-08 15:13:00 +01:00
Peter Steinberger
be98cde23f
chore: release v0.7.0 2026-05-08 15:00:32 +01:00
Peter Steinberger
b52eefaa40
chore: satisfy Go 1.26 lint checks 2026-05-08 10:04:39 +01:00
Peter Steinberger
733714a5e7
build: bump Go toolchain to 1.26.3 2026-05-08 10:01:08 +01:00
Peter Steinberger
40c787c54a
refactor: consume crawlkit embedding primitives 2026-05-08 09:58:34 +01:00
Peter Steinberger
40317aa538
fix: keep read commands available during tail 2026-05-08 09:49:13 +01:00
Peter Steinberger
fb969672e0
test: cover cli and archive helper edges 2026-05-08 08:37:27 +01:00
Peter Steinberger
67c6f4655b
fix(share): delta import git snapshots 2026-05-08 08:29:38 +01:00
Peter Steinberger
335a95bd66
ci: update homebrew tap on release
Some checks failed
Security Gate: Secret Scanning / Scan for Verified Secrets (push) Has been cancelled
ci / lint (push) Has been cancelled
ci / test (push) Has been cancelled
ci / deps (push) Has been cancelled
ci / release-check (push) Has been cancelled
ci / secrets (push) Has been cancelled
CodeQL / analyze (push) Has been cancelled
Pages / Deploy docs (push) Has been cancelled
2026-05-07 03:56:51 +01:00
Vincent Koc
d8c8778f19
build(deps): bump crawlkit to v0.4.1 (#58) 2026-05-06 14:52:59 -07:00
Vincent Koc
eeb10dcd30
Merge pull request #57 from openclaw/ci-security-baseline
chore(ci): add crawl security baseline
2026-05-06 01:55:22 -07:00
Vincent Koc
016e849e3c
docs: document SQL archive queries 2026-05-06 01:54:34 -07:00
Vincent Koc
89d35a67a4
docs: refresh discrawl agent skill 2026-05-06 01:47:05 -07:00
Vincent Koc
0da02de393
chore(security): add verified secret scanning 2026-05-06 01:37:04 -07:00
Vincent Koc
14dd5478f4
chore: add Go repository hygiene files 2026-05-06 01:37:03 -07:00
Vincent Koc
4b4303556a
chore(ci): add stale issue automation 2026-05-06 00:30:20 -07:00
Vincent Koc
abcb77e6fc
chore(ci): add CodeQL analysis 2026-05-06 00:30:19 -07:00
Vincent Koc
f328cfba2f
chore(security): add protected automation owners 2026-05-06 00:30:17 -07:00
Vincent Koc
f32dae98cc
docs: document crawlkit archive surfaces 2026-05-05 19:16:52 -07:00
Vincent Koc
98be6a9c11
fix(ci): restore crawlkit merge checks 2026-05-05 18:48:48 -07:00
Vincent Koc
ebb41dabfd
merge: use crawlkit infrastructure
* feat/use-crawlkit: (50 commits)
  fix(release): update version ldflag module path
  chore(deps): use crawlkit v0.4.0
  fix(tui): hydrate discord roots without thread scans
  fix(tui): limit discord thread hydration
  fix(tui): hydrate discord reply context
  fix(share): forward snapshot import progress
  fix(tui): browse newest discord messages
  fix(tui): show discord attachment details
  feat(tui): refresh discord archive rows
  fix(tui): resolve discord inline mentions
  fix(tui): render discord mention names
  docs: note shared tui polish
  fix(tui): document shared controls
  fix(tui): expose discord message details
  fix(tui): add Discord message URLs
  docs(tui): note dm pane labels
  fix(tui): label discord direct message panes
  fix(tui): use compact-pane crawlkit
  fix(tui): pick up shared detail renderer
  fix(sync): include progress percentages
  ...
2026-05-05 18:20:49 -07:00
Vincent Koc
8751f10779
fix(release): update version ldflag module path 2026-05-05 15:02:21 -07:00
Vincent Koc
5daa12f12c
chore(deps): use crawlkit v0.4.0 2026-05-05 15:00:24 -07:00
Vincent Koc
af78c7124f
fix(tui): hydrate discord roots without thread scans 2026-05-05 15:00:24 -07:00
Vincent Koc
acc470311a
fix(tui): limit discord thread hydration 2026-05-05 15:00:24 -07:00
Vincent Koc
18d4aba76a
fix(tui): hydrate discord reply context 2026-05-05 15:00:24 -07:00
Vincent Koc
c8118d9dcc
fix(share): forward snapshot import progress 2026-05-05 15:00:24 -07:00
Vincent Koc
c758a33753
fix(tui): browse newest discord messages 2026-05-05 15:00:24 -07:00
Vincent Koc
9e822ad7d7
fix(tui): show discord attachment details 2026-05-05 15:00:23 -07:00
Vincent Koc
01b1053809
feat(tui): refresh discord archive rows 2026-05-05 15:00:23 -07:00
Vincent Koc
058eb0699e
fix(tui): resolve discord inline mentions 2026-05-05 15:00:23 -07:00
Vincent Koc
a4f5d3fdb4
fix(tui): render discord mention names 2026-05-05 15:00:23 -07:00
Vincent Koc
39906edc3d
docs: note shared tui polish 2026-05-05 15:00:23 -07:00
Vincent Koc
6d31b368fc
fix(tui): document shared controls 2026-05-05 15:00:23 -07:00
Vincent Koc
e67f3e059b
fix(tui): expose discord message details 2026-05-05 15:00:23 -07:00
Vincent Koc
c6d969f998
fix(tui): add Discord message URLs 2026-05-05 15:00:22 -07:00
Vincent Koc
082d384792
docs(tui): note dm pane labels 2026-05-05 15:00:22 -07:00
Vincent Koc
7e3df6e1aa
fix(tui): label discord direct message panes 2026-05-05 15:00:22 -07:00
Vincent Koc
7a510faad5
fix(tui): use compact-pane crawlkit 2026-05-05 15:00:21 -07:00
Vincent Koc
39996a085e
fix(tui): pick up shared detail renderer 2026-05-05 15:00:21 -07:00
Vincent Koc
2b69518169
fix(sync): include progress percentages 2026-05-05 15:00:21 -07:00
Vincent Koc
87da9945b6
fix(tui): apply default guild scope 2026-05-05 14:59:15 -07:00
Vincent Koc
4725873906
fix(tui): label discord archive rows 2026-05-05 14:59:14 -07:00
Vincent Koc
6feb197446
chore(deps): bump crawlkit to v0.3.13 2026-05-05 14:59:14 -07:00
Vincent Koc
e918bf494a
fix(share): filter local-only snapshot imports 2026-05-05 14:59:14 -07:00
Vincent Koc
87fdb1f49c
chore(deps): update crawlkit to v0.3.11 2026-05-05 14:59:14 -07:00
Vincent Koc
dff96610cc
fix(cli): show tui help without opening the archive 2026-05-05 14:59:14 -07:00
Vincent Koc
f3f2496e08
chore(deps): update crawlkit to v0.3.10 2026-05-05 14:59:14 -07:00
Vincent Koc
3fa4af5e2c
chore(deps): update crawlkit to v0.3.9 2026-05-05 14:59:14 -07:00
Vincent Koc
ac5fb8233e
chore(deps): update crawlkit to v0.3.8 2026-05-05 14:59:14 -07:00
Vincent Koc
13f08f5955
docs(changelog): note TUI pane polish 2026-05-05 14:59:13 -07:00
Vincent Koc
300c2f1cfe
chore(deps): update crawlkit to v0.3.7 2026-05-05 14:59:13 -07:00
Vincent Koc
36c9a173e4
fix(tui): enrich Discord archive rows 2026-05-05 14:59:13 -07:00
Vincent Koc
43411bacf2
feat(tui): use shared pane browser 2026-05-05 14:59:13 -07:00
Vincent Koc
9cdb40181a
chore(deps): update crawlkit to v0.3.5 2026-05-05 14:59:13 -07:00
Vincent Koc
0eca15aaf6
fix(tui): use crawlkit empty-json fix 2026-05-05 14:59:13 -07:00
Vincent Koc
8fc6f1f789
fix(tui): use crawlkit safe renderer 2026-05-05 14:59:12 -07:00
Vincent Koc
ab39e1bde2
ci: smoke crawlkit control surface 2026-05-05 14:59:12 -07:00
Vincent Koc
c4be70e521
feat(cli): add crawlkit control surface 2026-05-05 14:59:11 -07:00
Vincent Koc
5e5c401531
feat(tui): use universal archive rows 2026-05-05 14:57:10 -07:00
Vincent Koc
f7db36c7fd
chore: tidy crawlkit module sums 2026-05-05 14:57:10 -07:00
Vincent Koc
7fcb7bb599
refactor: use crawlkit package nouns 2026-05-05 14:57:10 -07:00
Vincent Koc
a13447fd47
chore: use crawlkit v0.2.0 2026-05-05 14:56:30 -07:00
Vincent Koc
88d43dd77b
docs(tui): document discord browser workflow 2026-05-05 14:56:29 -07:00
Vincent Koc
f0752b7e2f
feat(tui): add discord archive browser 2026-05-05 14:56:29 -07:00
Vincent Koc
65672636de
chore: use crawlkit v0.1.1 2026-05-05 14:56:29 -07:00
Vincent Koc
ddf769d09b
chore: use crawlkit v0.1.0 2026-05-05 14:56:29 -07:00
Vincent Koc
5406ae59b9
refactor(share): use crawlkit archive helpers 2026-05-05 14:56:29 -07:00
Vincent Koc
6202e6bf55
refactor(store): use crawlkit sqlite opener 2026-05-05 14:55:48 -07:00
Vincent Koc
4e9b9bee86
refactor(config): use crawlkit runtime paths 2026-05-05 14:55:48 -07:00
Vincent Koc
4ee6379494
chore: add crawlkit module dependency 2026-05-05 14:55:48 -07:00
Peter Steinberger
8999ff5fd3
chore: start 0.7.0 development
Some checks failed
ci / lint (push) Has been cancelled
ci / test (push) Has been cancelled
ci / deps (push) Has been cancelled
ci / release-check (push) Has been cancelled
ci / secrets (push) Has been cancelled
2026-05-05 10:27:11 +01:00
Peter Steinberger
e351d0ecdc
docs: polish discrawl contact links 2026-05-05 10:20:15 +01:00
Peter Steinberger
3b76ba7973
chore: move module to openclaw/discrawl 2026-05-05 10:07:56 +01:00
Peter Steinberger
f3aaf284f2
docs: add discrawl social card 2026-05-05 03:58:21 +01:00
Peter Steinberger
70817284fa
docs: make discrawl.sh canonical 2026-05-05 03:49:43 +01:00
Peter Steinberger
4f15765afb
docs: add generated docs site 2026-05-05 03:34:49 +01:00
Peter Steinberger
8c64819033
docs: keep contact email plain 2026-05-05 02:23:39 +01:00
Peter Steinberger
3566b74d89
docs: add discrawl contact page 2026-05-05 02:05:09 +01:00
Peter Steinberger
d1f4d378f7
release: v0.6.6 2026-05-05 01:47:34 +01:00
Peter Steinberger
6ea543b4c6
test: cover wiretap cache checkpoint helpers 2026-05-05 01:43:13 +01:00
Peter Steinberger
78fcca8204
fix: stabilize wiretap cache checkpointing 2026-05-05 01:22:48 +01:00
Peter Steinberger
68b49c90a5
ci: skip warm discord backup imports 2026-05-03 19:41:40 +01:00
Peter Steinberger
86502b251c
chore: bump discrawl to 0.6.5 2026-05-03 15:36:02 +01:00
Peter Steinberger
dc72ac200e
docs: add 0.6.5 unreleased changelog 2026-05-03 15:31:51 +01:00
Peter Steinberger
c934c579b0
fix: keep discrawl sync update explicit 2026-05-03 15:03:41 +01:00
Peter Steinberger
45f0133b62
docs: update 0.6.4 changelog 2026-05-03 12:15:51 +01:00
Peter Steinberger
a406662a8d
test: isolate sync fixture desktop path 2026-05-03 12:12:38 +01:00
Peter Steinberger
2d6de140d9
test: relax signal cancellation race expectation 2026-05-03 12:10:35 +01:00
Peter Steinberger
2e155ba97c
chore: prepare 0.6.4 release 2026-05-03 12:04:35 +01:00
Peter Steinberger
c5b3f7ad52
fix: treat cancellation as normal exit 2026-05-03 12:03:57 +01:00
Peter Steinberger
25b1eb878d
fix: handle discrawl termination safely 2026-05-03 11:58:44 +01:00
Peter Steinberger
0b12c3c653
ci: refresh lint tool pins 2026-05-03 11:57:03 +01:00
dependabot[bot]
33dc2ef228
build(deps): bump github.com/pelletier/go-toml/v2 from 2.3.0 to 2.3.1 (#54)
Bumps [github.com/pelletier/go-toml/v2](https://github.com/pelletier/go-toml) from 2.3.0 to 2.3.1.
- [Release notes](https://github.com/pelletier/go-toml/releases)
- [Commits](https://github.com/pelletier/go-toml/compare/v2.3.0...v2.3.1)

---
updated-dependencies:
- dependency-name: github.com/pelletier/go-toml/v2
  dependency-version: 2.3.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-05-03 11:51:50 +01:00
dependabot[bot]
8b8d8a857e
build(deps): bump goreleaser/goreleaser-action from 7.1.0 to 7.2.1 (#53)
Bumps [goreleaser/goreleaser-action](https://github.com/goreleaser/goreleaser-action) from 7.1.0 to 7.2.1.
- [Release notes](https://github.com/goreleaser/goreleaser-action/releases)
- [Commits](https://github.com/goreleaser/goreleaser-action/compare/v7.1.0...v7.2.1)

---
updated-dependencies:
- dependency-name: goreleaser/goreleaser-action
  dependency-version: 7.2.1
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2026-05-03 11:51:47 +01:00
Peter Steinberger
99229b7db6
chore: start 0.6.4 development 2026-05-02 08:20:10 +01:00
Peter Steinberger
4e34c550e0
release: v0.6.3 2026-05-01 13:06:16 +01:00
Peter Steinberger
41389d548f
docs: update unreleased changelog 2026-05-01 13:04:49 +01:00
Peter Steinberger
c47732ca10
chore(ci): tighten go linting 2026-05-01 13:01:21 +01:00
Peter Steinberger
b7346ada62
fix(ci): satisfy testifylint 2026-05-01 12:52:45 +01:00
Peter Steinberger
624b771894
fix(config): add keyring fallback for bot token 2026-05-01 11:27:52 +01:00
Peter Steinberger
5971c9861c
test(store): cover fts query normalization
Co-authored-by: Matt Van Horn <455140+mvanhorn@users.noreply.github.com>
2026-05-01 11:12:15 +01:00
Vincent Koc
f10cb25272 release: v0.6.2 2026-05-01 01:59:47 -07:00
Vincent Koc
381c69339e fix(ci): satisfy intrange lint 2026-05-01 01:51:29 -07:00
Vincent Koc
7ba158b6b4
fix(wiretap): cover current Discord cache layout (#52) 2026-05-01 01:44:07 -07:00
Peter Steinberger
e4e246989b
feat(analytics): finish trends and digest replies
Co-authored-by: Matt Van Horn <455140+mvanhorn@users.noreply.github.com>
2026-05-01 09:32:42 +01:00
Matt Van Horn
71b602c5f6
feat(analytics): add quiet channel report
Adds analytics quiet for reporting silent message-bearing channels.
2026-05-01 01:16:18 -07:00
Matt Van Horn
468ad1680c
feat(cli): digest command for windowed per-channel summary
Adds a windowed per-channel digest command over existing archive tables.
2026-05-01 00:45:40 -07:00
Peter Steinberger
41b675d5f4
fix: classify selected Discord DMs from desktop cache 2026-05-01 08:32:08 +01:00
Peter Steinberger
0487ccc1e0
fix: harden discrawl archive imports 2026-05-01 07:23:58 +01:00
Peter Steinberger
ca817db4f5
chore: prepare 0.6.2 unreleased 2026-04-29 16:10:25 +01:00
Peter Steinberger
b387ed2d6f
fix(share): harden git snapshot imports (#51)
* fix(share): harden git snapshot imports

* docs: update changelog for share import hardening
2026-04-29 15:19:44 +01:00
Peter Steinberger
59f42cb0ab
fix(cli): serialize discrawl sync runs 2026-04-29 14:43:53 +01:00
Peter Steinberger
28676d38f3
release: v0.6.1 2026-04-28 02:24:02 +01:00
Peter Steinberger
a2ed8c6e8b
perf: skip unchanged wiretap cache files 2026-04-28 02:04:33 +01:00
Peter Steinberger
5a0ca81644
refactor: smooth sync mode handling 2026-04-27 21:24:28 +01:00
Peter Steinberger
a98c6a6193
ci: skip backup workflows without repository 2026-04-27 21:12:57 +01:00
Peter Steinberger
f39e448792
fix: honor skip-members in full sync resume 2026-04-27 21:09:48 +01:00
Peter Steinberger
7af036009e
refactor: remove OpenClaw-specific config coupling 2026-04-27 20:39:35 +01:00
Peter Steinberger
6808268342
refactor: isolate OpenClaw SecretRef resolution 2026-04-27 14:45:17 +01:00
R. Teodoro
4cc6bb05b9
fix: support OpenClaw SecretRef tokens (#49)
OpenClaw Discord token loading now accepts SecretRef objects backed by file or env providers while preserving provider source and allowlist semantics.

Co-authored-by: TeodoroRodrigo <rodrigoteodoro.90@gmail.com>
2026-04-27 14:36:52 +01:00
Peter Steinberger
53a2d80334
build: update go dependencies 2026-04-27 10:31:39 +01:00
Peter Steinberger
30bd3a5d18
docs: add discrawl skill 2026-04-27 02:11:44 +01:00
Peter Steinberger
37a595bd7d
fix: remove unreleased twitter import 2026-04-27 00:27:08 +01:00
Peter Steinberger
4f6d70830d
fix: preserve twitter archives during share import 2026-04-26 01:57:05 +01:00
Peter Steinberger
c8986cbcf9
feat: import twitter archives 2026-04-26 01:24:21 +01:00
Peter Steinberger
7f3ed7a8e7
fix: allow missing desktop cache during wiretap 2026-04-25 04:15:43 +01:00
Peter Steinberger
214ca972dc
chore: update dependencies for 0.6.1 2026-04-25 04:07:22 +01:00
Peter Steinberger
ee30702fad
release: v0.6.0 2026-04-24 20:59:20 +01:00
Peter Steinberger
e0de7a51c6
ci: strengthen test and release checks 2026-04-24 20:48:49 +01:00
Peter Steinberger
ad609bcc6b
chore: refresh go tooling and dependencies 2026-04-24 20:36:03 +01:00
Peter Steinberger
d83d1c2f6f
test: raise coverage floor to 85 percent 2026-04-24 20:24:57 +01:00
Peter Steinberger
1e66b7d698
test: increase DM and wiretap coverage 2026-04-24 19:23:20 +01:00
Peter Steinberger
3b00967219
chore: start 0.5.2 development 2026-04-24 18:51:51 +01:00
Peter Steinberger
e22a3709ca
fix(wiretap): infer direct message channel names 2026-04-24 18:43:38 +01:00
Peter Steinberger
8a9859f50b
feat(cli): add fast DM query shortcuts 2026-04-24 17:58:46 +01:00
Peter Steinberger
69f7671ed5
release: v0.5.1 2026-04-24 17:22:12 +01:00
Peter Steinberger
b86490acad
fix(share): keep wiretap DMs out of snapshots 2026-04-24 17:22:01 +01:00
Vincent Koc
8848c2e612
docs: clarify sync sources and wiretap 2026-04-24 08:50:20 -07:00
Peter Steinberger
e2db10e514
release: v0.5.0 2026-04-24 16:35:12 +01:00
Peter Steinberger
a2b4f72f4a
fix(discord): retain wiretap channel metadata 2026-04-24 16:35:08 +01:00
Solomon Neas
ea097000d1
test(syncer): cover ThreadsArchived 403 tolerance
Strengthen archived thread 403 coverage by tracking ThreadsArchived calls separately from active thread calls and asserting no unavailable marker is written for skipped archived crawls.
2026-04-24 00:49:51 -07:00
Vincent Koc
0a247939e8
feat(discord): add desktop wiretap import (#43)
* feat(discord): add desktop wiretap import

* fix(discord): satisfy wiretap lint
2026-04-24 00:44:23 -07:00
143 changed files with 13962 additions and 2811 deletions

View File

@ -0,0 +1,124 @@
---
name: discrawl
description: Use for local Discord archive search, sync freshness, DMs, channel summaries, desktop/API/git-share sources, TUI browsing, and Discrawl repo/release work.
---
# Discrawl
Use local Discord archive data first for Discord questions. Hit Discord APIs
only when the archive is stale, missing the requested scope, or the user asks
for current external context.
## Sources
- DB: `~/.discrawl/discrawl.db`
- Config: `~/.discrawl/config.toml`
- Cache: `~/.discrawl/cache`
- Logs: `~/.discrawl/logs`
- Git share repo: `~/.discrawl/share`
- Repo: `openclaw/discrawl`; use `~/GIT/_Perso/discrawl` only after verifying
its remote targets `openclaw/discrawl`, otherwise use a fresh checkout
- Preferred CLI: `discrawl`; fallback to `go run ./cmd/discrawl` from the repo if the installed binary is stale
## Freshness
For recent/current questions, check freshness before analysis:
```bash
discrawl status --json
```
For precise freshness from the default database:
```bash
sqlite3 ~/.discrawl/discrawl.db \
"select coalesce(max(updated_at),'') from sync_state where scope like 'channel:%';"
```
Routine diagnostics:
```bash
discrawl doctor
```
Desktop-local refresh:
```bash
discrawl sync --source wiretap
```
Bot API latest refresh, when credentials are available:
```bash
discrawl sync
```
Use `--full` only for deliberate historical backfills:
```bash
discrawl sync --full
```
If SQLite reports busy/locked, check for stray `discrawl` processes before retrying.
## Query Workflow
1. Resolve scope: guild, channel, DM, author, keyword, date range.
2. Check freshness for recent/current requests.
3. Prefer CLI search/messages for slices; use read-only SQL for exact counts.
4. Report absolute date spans, counts, channel/DM names, and known gaps.
Common commands:
```bash
discrawl search "query"
discrawl messages --channel '#maintainers' --days 7 --all
discrawl dms --last 20
discrawl tui --dm
discrawl sql "select count(*) from messages;"
```
## SQL
Use `discrawl sql` for exact counts, joins, and ranking queries when normal
CLI reads are too coarse. The command is read-only by default, accepts SQL as
args or stdin, and supports `--json` for agent parsing.
Useful examples:
```bash
discrawl --json sql "select count(*) as messages from messages;"
discrawl --json sql "select coalesce(nullif(c.name, ''), m.channel_id) as channel, count(*) as messages from messages m left join channels c on c.id = m.channel_id group by m.channel_id order by messages desc limit 20;"
discrawl --json sql "select coalesce(nullif(mm.display_name, ''), nullif(mm.global_name, ''), nullif(mm.username, ''), m.author_id) as author, count(*) as messages from messages m left join members mm on mm.guild_id = m.guild_id and mm.user_id = m.author_id group by m.guild_id, m.author_id order by messages desc limit 20;"
```
Never use `--unsafe --confirm` unless the user explicitly asks for a database
mutation and the write has been reviewed.
When the installed CLI lacks a new feature, build or run from a verified
`openclaw/discrawl` checkout before concluding the feature is missing.
## Discord Boundaries
Bot API sync requires configured Discord bot credentials; do not invent token
availability. Desktop wiretap mode reads local Discord Desktop artifacts and
must not extract credentials, use user tokens, call Discord as the user, or
write to Discord application storage. Wiretap/Desktop cache DMs are local-only
and must not be described as part of the published Git snapshot. Git-share
snapshots must not include secrets or `@me` DM rows.
## Verification
For repo edits, prefer existing Go gates:
```bash
GOWORK=off go test ./...
```
Then run targeted CLI smoke for the touched surface, for example:
```bash
discrawl doctor
discrawl status --json
discrawl search "test" --limit 5
```

12
.editorconfig Normal file
View File

@ -0,0 +1,12 @@
root = true
[*]
charset = utf-8
end_of_line = lf
insert_final_newline = true
indent_style = tab
indent_size = 4
[*.{md,yml,yaml,json,toml}]
indent_style = space
indent_size = 2

6
.gitattributes vendored Normal file
View File

@ -0,0 +1,6 @@
* text=auto
*.go text eol=lf
*.md text eol=lf
*.toml text eol=lf
*.yml text eol=lf
*.yaml text eol=lf

12
.github/CODEOWNERS vendored Normal file
View File

@ -0,0 +1,12 @@
# Protect ownership and automation rules.
/.github/CODEOWNERS @openclaw/openclaw-secops
/.github/dependabot.yml @openclaw/openclaw-secops
/.github/workflows/ @openclaw/openclaw-secops
# Release, backup, and package integrity surfaces.
/.goreleaser.yaml @openclaw/openclaw-secops
/go.mod @openclaw/openclaw-secops
/go.sum @openclaw/openclaw-secops
/scripts/*backup* @openclaw/openclaw-secops
/scripts/*release* @openclaw/openclaw-secops
/scripts/*publish* @openclaw/openclaw-secops

View File

@ -30,13 +30,13 @@ jobs:
- name: Lint
uses: golangci/golangci-lint-action@v9.2.0
with:
version: v2.11.1
version: v2.12.1
- name: Install analyzers
run: |
go install honnef.co/go/tools/cmd/staticcheck@2025.1.1
go install mvdan.cc/gofumpt@v0.7.0
go install github.com/securego/gosec/v2/cmd/gosec@v2.22.9
go install honnef.co/go/tools/cmd/staticcheck@v0.7.0
go install mvdan.cc/gofumpt@v0.9.2
go install github.com/securego/gosec/v2/cmd/gosec@v2.26.1
- name: Vet
run: go vet ./...
@ -70,10 +70,10 @@ jobs:
cache: true
- name: Test with coverage
run: go test ./... -coverprofile=coverage.out
run: go test -count=1 ./... -coverprofile=coverage.out
- name: Test with race detector
run: go test -race ./...
run: go test -count=1 -race ./...
- name: Enforce coverage floor
run: |
@ -83,15 +83,27 @@ jobs:
print "missing coverage total"
exit 1
}
if (total + 0 < 80.0) {
printf("coverage %.1f%% is below 80%%\n", total + 0)
if (total + 0 < 85.0) {
printf("coverage %.1f%% is below 85%%\n", total + 0)
exit 1
}
printf("coverage %.1f%%\n", total + 0)
}'
- name: Build
run: go build ./cmd/discrawl
run: go build -o bin/discrawl ./cmd/discrawl
- name: Smoke test CLI control surface
run: |
set -euo pipefail
output="$(./bin/discrawl help)"
printf '%s\n' "$output"
printf '%s' "$output" | grep -q "metadata"
printf '%s' "$output" | grep -q "tui"
test -n "$(./bin/discrawl --version)"
./bin/discrawl metadata --json | grep -q '"schema_version"'
./bin/discrawl status --json | grep -q '"databases"'
./bin/discrawl tui --json | grep -q '^\['
deps:
runs-on: ubuntu-latest
@ -109,12 +121,39 @@ jobs:
- name: Verify module cache
run: go mod verify
- name: Check go.mod tidy
run: |
go mod tidy
git diff --exit-code -- go.mod go.sum
- name: Install govulncheck
run: go install golang.org/x/vuln/cmd/govulncheck@v1.1.4
run: go install golang.org/x/vuln/cmd/govulncheck@v1.3.0
- name: Run govulncheck
run: '"$(go env GOPATH)/bin/govulncheck" ./...'
release-check:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
with:
fetch-depth: 0
- name: Setup Go
uses: actions/setup-go@v6.4.0
with:
go-version-file: go.mod
cache: true
- name: Snapshot release build
uses: goreleaser/goreleaser-action@v7.2.1
with:
distribution: goreleaser
version: "~> v2"
args: release --snapshot --clean --skip=publish
secrets:
runs-on: ubuntu-latest
timeout-minutes: 15
@ -131,7 +170,7 @@ jobs:
cache: true
- name: Install gitleaks
run: go install github.com/zricethezav/gitleaks/v8@v8.30.0
run: go install github.com/zricethezav/gitleaks/v8@v8.30.1
- name: Scan git history
run: |

37
.github/workflows/codeql.yml vendored Normal file
View File

@ -0,0 +1,37 @@
name: CodeQL
on:
pull_request:
push:
branches:
- main
schedule:
- cron: "29 4 * * 1"
workflow_dispatch:
permissions:
actions: read
contents: read
security-events: write
jobs:
analyze:
name: analyze
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Go
uses: actions/setup-go@v6
with:
go-version-file: go.mod
cache: true
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
languages: go
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4

View File

@ -26,11 +26,6 @@ jobs:
go-version-file: go.mod
cache: true
- name: Setup Node
uses: actions/setup-node@v6.0.0
with:
node-version: "24"
- name: Restore Discord DB cache
id: restore-discord-db
uses: actions/cache/restore@v5.0.5
@ -51,46 +46,27 @@ jobs:
- name: Generate daily Discord report
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
DISCORD_BACKUP_TOKEN: ${{ secrets.DISCORD_BACKUP_TOKEN }}
GH_TOKEN: ${{ secrets.DISCORD_FIELD_NOTES_GITHUB_TOKEN || github.token }}
DISCRAWL_BACKUP_REPOSITORY: ${{ secrets.DISCRAWL_BACKUP_REPOSITORY }}
CONFIG: ${{ runner.temp }}/discrawl/config.toml
DB: ${{ github.workspace }}/.discrawl-ci/discrawl.db
BACKUP_REPO: ${{ runner.temp }}/discord-backup
OPENCLAW_STATE_DIR: ${{ runner.temp }}/openclaw
DISCORD_FIELD_NOTES_GITHUB_REPO: openclaw/openclaw
run: |
if [ -z "${DISCRAWL_BACKUP_REPOSITORY:-}" ]; then
echo "::notice title=Backup report skipped::Configure DISCRAWL_BACKUP_REPOSITORY as owner/repo to enable archive reports."
exit 0
fi
if [ -z "${DISCORD_BACKUP_TOKEN:-}" ]; then
echo "::error title=Missing secret::Configure DISCORD_BACKUP_TOKEN with write access to openclaw/discord-backup."
echo "::error title=Missing secret::Configure DISCORD_BACKUP_TOKEN with write access to the backup repository."
exit 1
fi
BACKUP_REMOTE="https://x-access-token:${DISCORD_BACKUP_TOKEN}@github.com/openclaw/discord-backup.git"
BACKUP_REMOTE="https://x-access-token:${DISCORD_BACKUP_TOKEN}@github.com/${DISCRAWL_BACKUP_REPOSITORY}.git"
mkdir -p "$(dirname "$CONFIG")"
mkdir -p "$(dirname "$DB")"
git clone "$BACKUP_REMOTE" "$BACKUP_REPO"
printf 'db_path = "%s"\n' "$DB" > "$CONFIG"
go run ./cmd/discrawl --config "$CONFIG" subscribe --repo "$BACKUP_REPO" "$BACKUP_REMOTE"
go run ./cmd/discrawl --config "$CONFIG" report --readme "$BACKUP_REPO/README.md"
if [ -n "${OPENAI_API_KEY:-}" ]; then
npm install -g openclaw@latest
openclaw onboard \
--non-interactive \
--mode local \
--auth-choice openai-api-key \
--secret-input-mode ref \
--accept-risk \
--skip-daemon \
--skip-skills \
--skip-search \
--skip-health
tmp_config="$(mktemp)"
jq '.agents.defaults.model = "openai/gpt-5.2" | .agents.defaults.timeoutSeconds = 300 | .agents.defaults.llm.idleTimeoutSeconds = 240' \
"$OPENCLAW_STATE_DIR/openclaw.json" > "$tmp_config"
mv "$tmp_config" "$OPENCLAW_STATE_DIR/openclaw.json"
scripts/discord-backup-field-notes.sh "$CONFIG" "$BACKUP_REPO"
else
echo "OPENAI_API_KEY not configured; skipping OpenClaw field notes"
fi
if git -C "$BACKUP_REPO" diff --quiet README.md; then
echo "README already up to date"
exit 0

52
.github/workflows/pages.yml vendored Normal file
View File

@ -0,0 +1,52 @@
name: Pages
on:
push:
branches:
- main
paths:
- "docs/**"
- "scripts/build-docs-site.mjs"
- ".github/workflows/pages.yml"
workflow_dispatch:
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: pages
cancel-in-progress: false
jobs:
deploy:
name: Deploy docs
runs-on: ubuntu-latest
timeout-minutes: 10
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Check out
uses: actions/checkout@v6
- name: Set up Node
uses: actions/setup-node@v6
with:
node-version: 24
- name: Build site
run: node scripts/build-docs-site.mjs
- name: Configure Pages
uses: actions/configure-pages@v6
- name: Upload artifact
uses: actions/upload-pages-artifact@v5
with:
path: dist/docs-site
- name: Deploy
id: deployment
uses: actions/deploy-pages@v5

View File

@ -48,30 +48,40 @@ jobs:
env:
DISCORD_BOT_TOKEN: ${{ secrets.DISCORD_BOT_TOKEN }}
DISCORD_BACKUP_TOKEN: ${{ secrets.DISCORD_BACKUP_TOKEN }}
DISCRAWL_BACKUP_REPOSITORY: ${{ secrets.DISCRAWL_BACKUP_REPOSITORY }}
DISCRAWL_GUILD_ID: ${{ secrets.DISCRAWL_GUILD_ID }}
CONFIG: ${{ runner.temp }}/discrawl/config.toml
DB: ${{ github.workspace }}/.discrawl-ci/discrawl.db
BACKUP_REPO: ${{ runner.temp }}/discord-backup
run: |
if [ -z "${DISCRAWL_BACKUP_REPOSITORY:-}" ]; then
echo "::notice title=Backup publish skipped::Configure DISCRAWL_BACKUP_REPOSITORY as owner/repo to enable archive publishing."
exit 0
fi
if [ -z "${DISCORD_BOT_TOKEN:-}" ]; then
echo "::error title=Missing secret::Configure DISCORD_BOT_TOKEN in the discrawl repo secrets."
exit 1
fi
if [ -z "${DISCORD_BACKUP_TOKEN:-}" ]; then
echo "::error title=Missing secret::Configure DISCORD_BACKUP_TOKEN with write access to openclaw/discord-backup."
echo "::error title=Missing secret::Configure DISCORD_BACKUP_TOKEN with write access to the backup repository."
exit 1
fi
if [ -z "${DISCRAWL_GUILD_ID:-}" ]; then
echo "::error title=Missing secret::Configure DISCRAWL_GUILD_ID with the Discord guild to publish."
exit 1
fi
BACKUP_REMOTE="https://x-access-token:${DISCORD_BACKUP_TOKEN}@github.com/openclaw/discord-backup.git"
BACKUP_REMOTE="https://x-access-token:${DISCORD_BACKUP_TOKEN}@github.com/${DISCRAWL_BACKUP_REPOSITORY}.git"
mkdir -p "$(dirname "$CONFIG")"
mkdir -p "$(dirname "$DB")"
git clone "$BACKUP_REMOTE" "$BACKUP_REPO"
go run ./cmd/discrawl --config "$CONFIG" init --db "$DB" --guild "$DISCRAWL_GUILD_ID"
if [ -f "$BACKUP_REPO/manifest.json" ]; then
go run ./cmd/discrawl --config "$CONFIG" update --repo "$BACKUP_REPO" --remote "$BACKUP_REMOTE"
if [ -s "$DB" ]; then
echo "Restored Discord DB cache at $DB; skipping pre-sync snapshot import."
else
echo "Discord DB cache missing; importing latest published snapshot before latest-only sync."
go run ./cmd/discrawl --config "$CONFIG" update --repo "$BACKUP_REPO" --remote "$BACKUP_REMOTE"
fi
fi
go run ./cmd/discrawl --config "$CONFIG" sync --guild "$DISCRAWL_GUILD_ID" --skip-members --latest-only
git -C "$BACKUP_REPO" pull --ff-only origin main

View File

@ -37,10 +37,69 @@ jobs:
run: git checkout ${{ inputs.tag }}
- name: GoReleaser
uses: goreleaser/goreleaser-action@v7
uses: goreleaser/goreleaser-action@v7.2.1
with:
distribution: goreleaser
version: "~> v2"
args: release --clean --config /tmp/.goreleaser.yaml
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
update-homebrew-tap:
runs-on: ubuntu-latest
needs: goreleaser
steps:
- name: Resolve release tag
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "RELEASE_TAG=${{ inputs.tag }}" >> "$GITHUB_ENV"
else
echo "RELEASE_TAG=${{ github.ref_name }}" >> "$GITHUB_ENV"
fi
- name: Dispatch tap formula update
env:
GH_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }}
run: |
if [ -z "$GH_TOKEN" ]; then
echo "::error::Set HOMEBREW_TAP_TOKEN with workflow access to steipete/homebrew-tap"
exit 1
fi
request_id="discrawl-${RELEASE_TAG}-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
expected_title="Update discrawl for ${RELEASE_TAG} (${request_id})"
gh workflow run update-formula.yml \
--repo steipete/homebrew-tap \
--ref main \
-f formula=discrawl \
-f tag="$RELEASE_TAG" \
-f repository=openclaw/discrawl \
-f artifact_template="{formula}_{version}_{target}.tar.gz" \
-f request_id="$request_id"
run_id=""
for _ in {1..30}; do
run_id=$(gh run list \
--repo steipete/homebrew-tap \
--workflow update-formula.yml \
--branch main \
--event workflow_dispatch \
--limit 20 \
--json databaseId,displayTitle \
--jq ".[] | select(.displayTitle == \"$expected_title\") | .databaseId" | head -n1)
if [ -n "$run_id" ]; then
break
fi
sleep 5
done
if [ -z "$run_id" ]; then
echo "::error::Could not find tap workflow run with title: $expected_title"
exit 1
fi
gh run watch "$run_id" \
--repo steipete/homebrew-tap \
--exit-status \
--interval 10

63
.github/workflows/secret-scan.yml vendored Normal file
View File

@ -0,0 +1,63 @@
name: "Security Gate: Secret Scanning"
on:
push:
branches: ["**"]
pull_request:
branches: [main, master]
permissions: {}
jobs:
trufflehog:
name: Scan for Verified Secrets
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Resolve scan range
id: scan_range
env:
EVENT_NAME: ${{ github.event_name }}
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
PUSH_BASE_SHA: ${{ github.event.before }}
PUSH_HEAD_SHA: ${{ github.sha }}
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
run: |
set -euo pipefail
zero_sha="0000000000000000000000000000000000000000"
if [[ "$EVENT_NAME" == "pull_request" ]]; then
base="$PR_BASE_SHA"
head="$PR_HEAD_SHA"
else
base="$PUSH_BASE_SHA"
head="$PUSH_HEAD_SHA"
if [[ -z "$base" || "$base" == "$zero_sha" ]]; then
base="origin/$DEFAULT_BRANCH"
fi
fi
echo "base=$base" >> "$GITHUB_OUTPUT"
echo "head=$head" >> "$GITHUB_OUTPUT"
- name: TruffleHog OSS
id: trufflehog
uses: trufflesecurity/trufflehog@v3.95.2
with:
path: ./
base: ${{ steps.scan_range.outputs.base }}
head: ${{ steps.scan_range.outputs.head }}
extra_args: --only-verified --debug
- name: Notify on failure
if: steps.trufflehog.outcome == 'failure'
run: |
echo "::error::Verified secrets found. Rotate the credential before merging."
exit 1

86
.github/workflows/stale.yml vendored Normal file
View File

@ -0,0 +1,86 @@
name: Stale
on:
schedule:
- cron: "25 4 * * *"
workflow_dispatch:
permissions: {}
jobs:
stale:
permissions:
issues: write
pull-requests: write
runs-on: ubuntu-latest
steps:
- name: Mark stale unassigned issues and pull requests
uses: actions/stale@v10
with:
days-before-issue-stale: 14
days-before-issue-close: 7
days-before-pr-stale: 14
days-before-pr-close: 7
stale-issue-label: stale
stale-pr-label: stale
exempt-issue-labels: enhancement,maintainer,pinned,security,no-stale
exempt-pr-labels: maintainer,no-stale
operations-per-run: 1000
ascending: true
exempt-all-assignees: true
remove-stale-when-updated: true
stale-issue-message: |
This issue has been automatically marked as stale due to inactivity.
Please add updated discrawl details or it will be closed.
stale-pr-message: |
This pull request has been automatically marked as stale due to inactivity.
Please update it or it will be closed.
close-issue-message: |
Closing due to inactivity.
If this still affects discrawl, open a new issue with current reproduction details.
close-issue-reason: not_planned
close-pr-message: |
Closing due to inactivity.
If this PR should be revived, reopen it with current context and validation.
- name: Mark stale assigned issues
uses: actions/stale@v10
with:
days-before-issue-stale: 30
days-before-issue-close: 10
days-before-pr-stale: -1
days-before-pr-close: -1
stale-issue-label: stale
exempt-issue-labels: enhancement,maintainer,pinned,security,no-stale
operations-per-run: 1000
ascending: true
include-only-assigned: true
remove-stale-when-updated: true
stale-issue-message: |
This assigned issue has been automatically marked as stale after 30 days of inactivity.
Please add an update or it will be closed.
close-issue-message: |
Closing due to inactivity.
If this still affects discrawl, reopen or file a new issue with current evidence.
close-issue-reason: not_planned
- name: Mark stale assigned pull requests
uses: actions/stale@v10
with:
days-before-issue-stale: -1
days-before-issue-close: -1
days-before-pr-stale: 27
days-before-pr-close: 7
stale-pr-label: stale
exempt-pr-labels: maintainer,no-stale
operations-per-run: 1000
ascending: true
include-only-assigned: true
ignore-pr-updates: true
remove-stale-when-updated: true
stale-pr-message: |
This assigned pull request has been automatically marked as stale after being open for 27 days.
Please add an update or it will be closed.
close-pr-message: |
Closing due to inactivity.
If this PR should be revived, reopen it with current context and validation.

View File

@ -2,15 +2,33 @@ version: "2"
linters:
enable:
- asasalint
- bidichk
- bodyclose
- canonicalheader
- copyloopvar
- dupword
- durationcheck
- errcheck
- errchkjson
- errorlint
- exptostd
- gocheckcompilerdirectives
- gocritic
- gomoddirectives
- govet
- intrange
- ineffassign
- makezero
- misspell
- modernize
- nilerr
- nilnesserr
- noctx
- nolintlint
- nosprintfhostport
- perfsprint
- predeclared
- rowserrcheck
- sloglint
- sqlclosecheck
@ -18,6 +36,8 @@ linters:
- testifylint
- unconvert
- unused
- usetesting
- usestdlibvars
- wastedassign
formatters:

View File

@ -12,7 +12,7 @@ builds:
env:
- CGO_ENABLED=0
ldflags:
- -s -w -X github.com/steipete/discrawl/internal/cli.version={{ .Version }}
- -s -w -X github.com/openclaw/discrawl/internal/cli.version={{ .Version }}
targets:
- darwin_amd64
- darwin_arm64

View File

@ -1,11 +1,138 @@
# Changelog
All notable changes to `discrawl` will be documented in this file.
## Unreleased
## 0.7.0 - 2026-05-08
### Changes
- Added `discrawl tui`, a terminal archive browser for stored guild messages and local `@me` wiretap DMs using the shared crawlkit pane browser.
- Added crawlkit-backed `metadata --json`, `status --json`, and `doctor --json` control surfaces for launchers, automation, and CI checks.
- Published the generated documentation site at `discrawl.sh`, including command pages, install/setup docs, configuration, security notes, guides, a contact page, and social cards.
- Moved the Go module and release metadata to `github.com/openclaw/discrawl`.
### Fixes
- Kept documented command-local search flags working after the query, such as `discrawl search "term" --limit 5`. Thanks @PrinceOfEgypt.
- Made the terminal browser more useful and accurate: default guild scoping, newest-message startup, compact panes, selected-message detail panes, count-header sorting, local/remote status labels, right-click actions, Discord message URLs, row labels, direct-message pane labels, mention rendering, inline mention resolution, attachment details, and reply-context hydration without broad thread scans.
- Kept read-only commands such as `search`, `messages`, and safe `sql` usable while `tail` or another writer holds the sync lock. Thanks @PrinceOfEgypt.
- Kept `tui --help`, status, and terminal-browser reads safe for fresh or missing local databases without triggering Git snapshot auto-update.
- Kept local-only snapshot rows filtered during shared archive imports and forwarded snapshot import progress through the crawlkit import path.
- Made stale Git snapshot imports plan shard deltas from crawlkit file fingerprints or Git object identity, so routine shared-archive refreshes import changed message tail shards instead of rebuilding every table and FTS index.
- Included progress percentages in message-sync logs.
- Fixed GoReleaser version stamping after the module path move.
### Documentation
- Documented the crawlkit-backed config/status/control, snapshot, mirror, sync-state, output, and shared TUI surfaces now used on `main`.
- Clarified that Discord bot sync, desktop wiretap parsing, DM privacy filters, schema ownership, FTS/ranking, embeddings, and analytics remain app-owned.
- Aligned terminal-browser docs with the gitcrawl-style shared TUI model: channel/person/thread groups, message rows, detail/thread panes, sorting, mouse selection, right-click actions, and local/remote status chrome.
- Refreshed the repo-local `discrawl` agent skill for local Discord archive, freshness, query, boundary, TUI, verification, and read-only SQL workflows.
### Maintenance
- Migrated runtime paths, SQLite opening, archive mirror/export/import helpers, output/status wiring, and TUI plumbing onto the shared `crawlkit` infrastructure.
- Moved reusable embedding providers and vector helpers onto `crawlkit` while keeping Discrawl-owned storage, FTS, queueing, and privacy filters local.
- Updated crawlkit through `v0.4.1`, switched imports to `github.com/openclaw/crawlkit`, and added CI smoke coverage for the crawlkit control surface and merge behavior.
- Added CodeQL, verified secret scanning, protected automation owners, stale issue automation, `.editorconfig`, and `.gitattributes`.
- Added release workflow automation that dispatches the Homebrew tap formula update after GoReleaser publishes a tag.
## 0.6.6 - 2026-05-05
### Fixes
- `wiretap` now uses a fast default path for Discord Chromium cache imports: it scans cheap context files plus route-bearing HTTP cache entries, checkpoints file progress in batches, and leaves exhaustive historical cache archaeology behind `--full-cache` / `desktop.full_cache`.
## 0.6.5 - 2026-05-03
### Fixes
- Scheduled Discord backup publishing now skips redundant pre-sync snapshot imports when the workflow DB cache is warm, keeping fresh Git snapshots from getting delayed by a full archive reimport.
- `discrawl sync` now keeps Git snapshot refreshes explicit by default; use `--update=auto` or `--update=force` when you want a sync run to pull/import the shared snapshot before live Discord or desktop-cache deltas.
- Snapshot imports now emit phase/table/file progress and keep the sync lock file updated with the active phase, making long update/import runs diagnosable instead of looking hung.
- Recent-message scans are backed by a plain `messages(created_at, id)` index so archive freshness and short-window analysis queries avoid full-table scans.
## 0.6.4 - 2026-05-03
### Fixes
- `discrawl` now handles SIGINT/SIGTERM by canceling active sync/import contexts so large SQLite and FTS writes can roll back and close cleanly instead of being terminated mid-transaction.
### Maintenance
- Refreshed dependency and CI tooling pins, including GoReleaser, `go-toml`, golangci-lint, and gosec.
- Tightened CI compatibility with the latest linters and made signal-cancellation and sync fixture tests deterministic under the race detector.
## 0.6.3 - 2026-05-01
### Fixes
- Added OS keyring fallback for Discord bot-token resolution, keeping env as the first source and documenting the default keyring item. (#17)
- Clarified and locked down FTS query normalization so operator-like search terms such as `AND`, `OR`, `NOT`, `NEAR`, and `*` stay parameterized and quoted before SQLite `MATCH`. Thanks @mvanhorn.
### Maintenance
- Tightened Go linting with additional golangci-lint checks for compiler directives, host/port formatting, predeclared identifiers, missing command contexts, and related code-quality regressions.
- Updated test subprocess helpers to use test-scoped contexts and cleaned up assertions so the stricter CI suite stays green.
## 0.6.2 - 2026-05-01
### Changes
- Added `discrawl digest` for per-channel activity summaries with messages, replies, active authors, top posters, and top mentions. Thanks @mvanhorn.
- Added `discrawl analytics quiet` and `discrawl analytics trends` for finding silent top-level channels and week-over-week channel volume. Thanks @mvanhorn.
### Fixes
- `discrawl digest` now reports reply counts as `replies` instead of mislabeling reply roots as Discord threads.
- `discrawl sync` now serializes concurrent runs with a local lock, preventing two refreshes from writing the archive at the same time.
- Git snapshot imports now keep SQLite crash recovery enabled and share the same archive lock as sync, update, tail, wiretap, embed, and auto-update reads so interrupted imports are less likely to corrupt the live database.
- Git snapshot imports now recover from corrupt local FTS tables by dropping and rebuilding search indexes, and repair missing guild IDs from channel metadata so shared archive reports stay fresh.
- Channel-history sync now falls back to the channel guild when Discord omits `message.guild_id`, keeping messages, attachments, mentions, and FTS rows correctly scoped.
## 0.6.1 - 2026-04-28
### Fixes
- Repeated `sync --source wiretap` runs now skip unchanged Discord Desktop cache files and report unchanged file counts, making steady-state local-cache refreshes much faster.
- `sync --full --skip-members` now also skips member crawls when resuming incomplete stored channels, so backfills do not unexpectedly refresh the full guild member list.
### Maintenance
- Refactored sync-mode handling so routine latest syncs, `--all-channels`, `--full`, and member-refresh decisions share clearer internal paths with regression coverage.
- Refreshed Go module dependencies and CI tool/action pins, including staticcheck, gofumpt, gosec, govulncheck, gitleaks, setup-node, and GoReleaser.
- Hardened report README writes and Discord Desktop cache reads with root-scoped filesystem access to satisfy the latest gosec checks.
## 0.6.0 - 2026-04-24
### Changes
- `dms` now lists local wiretap DM conversations and can read or search one DM thread with `--with`, `--last`, and `--search`, so common DM queries no longer require raw SQL.
- `search --dm` and `messages --dm` now target the local-only `@me` archive directly and skip Git snapshot auto-update, since DMs are never imported from the shared mirror.
- Go module dependencies and lint rules were refreshed for the current Go toolchain, including stricter JSON marshal checks and modern simplification rules.
### Fixes
- Wiretap now infers fallback DM channel names from cached Discord user/profile data, so channels discovered only from route/message cache entries resolve to names like `Vincent K` instead of `channel-*`.
- Wiretap message output now preserves sanitized author labels in stored metadata, improving `dms` and `messages` output without storing raw desktop cache payloads.
### Tests
- Added regression coverage for DM channel-name inference from cached profile data when Discord Desktop cache lacks explicit channel recipient metadata.
- Added coverage for local DM conversation listing/filtering, DM cleanup paths, share import/export helpers, CLI DM windows, and Discord Desktop import helper edge cases.
- CI now runs uncached test and race suites, checks `go mod tidy`, and performs a snapshot GoReleaser build before release tags.
## 0.5.1 - 2026-04-24
### Fixes
- Git snapshot export/import now keeps wiretap DMs strictly local: `@me` rows, wiretap sync state, and DM vectors are excluded from published snapshots while existing local DM rows are preserved on import.
- Publishing without `--with-embeddings` now omits old embedding manifests instead of carrying forward a stale vector bundle.
## 0.5.0 - 2026-04-24
### Changes
- `sync --source both|discord|wiretap` controls bot-token sync versus local Discord Desktop cache import; the default is `both`.
- `wiretap` imports classifiable cached Discord Desktop message payloads into the local archive, including proven DMs under synthetic guild id `@me`, without using user tokens.
- `sync` now defaults to the fast latest-message refresh path for untargeted runs; use `--all-channels` for the broad stored-channel repair sweep or `--full` for historical backfill.
## 0.4.1 - 2026-04-22
@ -45,11 +172,10 @@ All notable changes to `discrawl` will be documented in this file.
- `sync --all` now bypasses `default_guild_id` so one run can fan out across every discovered guild without clearing the single-guild default first
- `sync --full` no longer aborts when forum thread discovery hits Discord `403 Missing Access`; inaccessible channels are skipped and marked unavailable while accessible channels continue syncing
- startup now validates and stamps SQLite schema version via `PRAGMA user_version`, and fails fast if the local DB schema is newer than the running binary
- `init --from-openclaw` now supports `--account`, and OpenClaw token fields can use `${ENV_VAR}` placeholders
- git-backed archive sharing can now export/import compressed JSONL snapshots with manifests, subscribe to a Git repo as the data source, and run in git-only mode without Discord credentials
- `messages`, `search`, and reports can automatically refresh stale git-backed data, preferring the Git snapshot before falling back to live Discord when both sources are configured
- the Discord backup publisher workflow now syncs latest messages, publishes the archive to a private GitHub repo, serializes concurrent runs, validates required secrets, and skips the member crawl for faster updates
- the backup report workflow now updates README activity stats, supports OpenClaw-generated field notes, runs the field-note logic from the backup action, and keeps those queries bounded with process timeouts
- the backup report workflow now updates README activity stats from the backup action and keeps those queries bounded with process timeouts
- `sync --latest-only` adds a lightweight refresh path for checking recent Discord messages without doing a full historical crawl
- repository imports now skip expensive rebuilds when the snapshot manifest is already current, and GitHub Actions persist the warmed SQLite database across runs
- the Docker git-source smoke test now verifies that a fresh install can subscribe to a repository-only archive and query messages, SQL, and reports
@ -73,12 +199,12 @@ All notable changes to `discrawl` will be documented in this file.
- multi-guild Discord crawler with single-guild default UX
- local SQLite archive with FTS5 search
- commands: `init`, `sync`, `tail`, `search`, `messages`, `mentions`, `sql`, `members`, `channels`, `status`, `doctor`
- OpenClaw config reuse plus env-based bot token discovery
- env-based bot token discovery
- resumable full-history sync, live gateway tailing, repair sync loop, targeted channel sync
- attachment-text indexing for small text-like uploads
- structured user and role mention indexing/querying
- empty-message filtering based on real searchable/displayable content instead of raw body only
- CI with lint, tests, secret scanning, and `80%+` coverage enforcement
- CI with lint, tests, secret scanning, and coverage enforcement
- release plumbing via GoReleaser, GitHub Actions, and Homebrew tap packaging
- sync correctness fixes for empty channels, inaccessible channels, unknown channels, and large-channel resume behavior
- SQLite/FTS performance fixes for backfill throughput and lower write amplification

259
README.md
View File

@ -1,8 +1,15 @@
# discrawl 🛰️ — Mirror Discord into SQLite; search server history locally
`discrawl` mirrors Discord guild data into local SQLite so you can search, inspect, and query server history without depending on Discord search. Teams can also publish that archive as a private Git snapshot repo, so readers get fresh org memory without Discord bot credentials.
`discrawl` mirrors Discord guild data into local SQLite so you can search, inspect, and query server history without depending on Discord search. It can also import classifiable Discord Desktop cache messages for local DM recovery/search without using a user token. Teams can publish the guild archive as a private Git snapshot repo, so readers get fresh org memory without Discord bot credentials.
Live sync uses real bot tokens. No user-token hacks. Data stays local unless you explicitly publish a Git-backed snapshot.
There are two local archive sources:
- Discord bot API sync for guilds, channels, members, threads, and message history the configured bot can access
- Discord Desktop cache import for local, classifiable cached messages, including proven local-only DMs under `@me`
Desktop wiretap mode reads local cache artifacts only. It does not extract credentials, use user tokens, call the Discord API as your user, or run a selfbot.
Wiretap DMs stay local and are never exported to the Git-backed snapshot mirror.
## What It Does
@ -13,7 +20,11 @@ Live sync uses real bot tokens. No user-token hacks. Data stays local unless you
- extracts small text-like attachments into the local search index
- records structured user and role mentions for direct querying
- tails Gateway events for live updates, with periodic repair syncs
- imports classifiable Discord Desktop cache messages with `wiretap`, including proven DMs under `@me`
- publishes and imports private Git-backed archive snapshots for org-wide read access
- browses stored messages and local DMs in a terminal archive UI
- exposes `metadata --json`, `status --json`, and `doctor --json` for local
launchers, automation, and CI
- supports Git-only read mode with no Discord credentials on reader machines
- generates backup README activity reports, with optional AI-written field notes
- exposes read-only SQL for ad hoc analysis
@ -24,7 +35,8 @@ Search defaults to all guilds. `sync` and `tail` default to the configured defau
## Requirements
- Go `1.26+`
- for publishing/syncing: a Discord bot token the bot can use to read the target guilds
- for publishing/syncing guilds: a Discord bot token the bot can use to read the target guilds
- for DM wiretap import: local Discord Desktop cache files on the same machine
- for read-only Git-backed access: access to a private snapshot repo, no Discord credentials required
- bot permissions for the channels you want archived when running `sync` or `tail`
@ -50,12 +62,12 @@ Without those intents/permissions, `sync`, `tail`, member snapshots, or message
Token resolution:
1. OpenClaw config, if `discord.token_source` is not `env`
2. `DISCORD_BOT_TOKEN` or the configured `discord.token_env`
1. `DISCORD_BOT_TOKEN` or the configured `discord.token_env`
2. OS keyring item `discrawl` / `discord_bot_token`, or the configured keyring service/account
`discrawl` accepts either raw token text or a value prefixed with `Bot `. It normalizes that automatically.
Fastest env-only path:
Fastest path:
```bash
export DISCORD_BOT_TOKEN="your-bot-token"
@ -71,7 +83,20 @@ export DISCORD_BOT_TOKEN="your-bot-token"
Then reload your shell before running `discrawl`.
If you already use OpenClaw, `discrawl` can reuse the Discord token from `~/.openclaw/openclaw.json` by default.
If you prefer the OS keyring, keep the token out of config and store it in the default keyring item:
```bash
# macOS Keychain
security add-generic-password -U -s discrawl -a discord_bot_token -w "$DISCORD_BOT_TOKEN"
# Linux Secret Service / libsecret
printf %s "$DISCORD_BOT_TOKEN" | secret-tool store --label="discrawl Discord bot token" service discrawl username discord_bot_token
# Windows Credential Manager
cmdkey /generic:discrawl:discord_bot_token /user:discord_bot_token /pass:%DISCORD_BOT_TOKEN%
```
Set `discord.token_source = "keyring"` if you want to require keyring lookup instead of env-first fallback.
Default runtime paths:
@ -92,7 +117,7 @@ discrawl --version
Build from source:
```bash
git clone https://github.com/steipete/discrawl.git
git clone https://github.com/openclaw/discrawl.git
cd discrawl
go build -o bin/discrawl ./cmd/discrawl
./bin/discrawl --version
@ -102,10 +127,11 @@ Examples below assume `discrawl` is on `PATH`. If you built from source without
## Quick Start
Reuse an existing OpenClaw Discord bot config:
Configure a Discord bot token and refresh both bot-visible guild data and local desktop cache data:
```bash
discrawl init --from-openclaw ~/.openclaw/openclaw.json
export DISCORD_BOT_TOKEN="..."
discrawl init
discrawl doctor
discrawl sync --full
discrawl sync
@ -113,26 +139,12 @@ discrawl search "panic: nil pointer"
discrawl tail
```
Multi-account OpenClaw setup:
```bash
discrawl init --from-openclaw ~/.openclaw/openclaw.json --account atlas
```
Env-only setup:
```bash
export DISCORD_BOT_TOKEN="..."
discrawl doctor
discrawl init
discrawl sync --full
discrawl sync
```
Use `discrawl sync --source wiretap` when you only want the local Discord Desktop cache import and do not want bot-token API sync.
Git-only reader setup:
```bash
discrawl subscribe https://github.com/openclaw/discord-backup.git
discrawl subscribe https://github.com/example/discord-archive.git
discrawl search "launch checklist"
discrawl messages --channel general --hours 24
```
@ -150,51 +162,94 @@ discrawl messages --channel general --hours 24
## Commands
### `tui`
Opens the local terminal archive browser for stored messages.
```bash
discrawl tui
discrawl tui --guild 123456789012345678 --channel general
discrawl tui --dm
discrawl --json tui --limit 50
```
The terminal browser uses the shared crawlkit explorer. The left pane groups
channels, people, or threads; the middle pane lists messages; the right pane
shows the selected message, surrounding conversation, and thread detail. Mouse
selection, right-click actions, sortable headers, and the local/remote footer
follow the same interaction model as `gitcrawl tui`. See
[`docs/commands/tui.md`](docs/commands/tui.md) for flags and read-only/DM scope
notes.
### `init`
Creates the local config and discovers accessible guilds.
```bash
discrawl init
discrawl init --from-openclaw ~/.openclaw/openclaw.json
discrawl init --from-openclaw ~/.openclaw/openclaw.json --account atlas
discrawl init --guild 123456789012345678
discrawl init --db ~/data/discrawl.db
```
When OpenClaw config tokens use `${ENV_VAR}` placeholders, `init` and `doctor` resolve them before auth.
### `sync`
Refreshes guild state into SQLite. Run one explicit `--full` pass when you want a complete historical archive; use plain `sync` afterward for frequent latest-message refreshes.
Refreshes SQLite from one or both archive sources.
By default, `sync` runs both live/local sources and does not import the Git snapshot first:
- Discord bot-token sync for bot-visible guild data
- local Discord Desktop cache import for classifiable cached messages and proven DMs
Use `discrawl update` when you want to pull/import the shared Git snapshot. If you intentionally want a sync run to import the snapshot before live deltas, pass `--update=auto` to import only when stale or `--update=force` to pull/import before syncing. `--no-update` is accepted as an explicit no-op alias for the default.
Run one explicit `--full` pass when you want a complete historical guild archive. Use plain `sync` afterward for frequent latest-message and desktop-cache refreshes.
```bash
discrawl sync
discrawl sync --update=auto
discrawl sync --update=force
discrawl sync --no-update
discrawl sync --full
discrawl sync --full --all
discrawl sync --guild 123456789012345678
discrawl sync --guilds 123,456 --concurrency 8
discrawl sync --source both # default: bot API + desktop cache
discrawl sync --source discord # bot API only; aliases: key, bot, api
discrawl sync --source wiretap # desktop cache only; aliases: desktop, cache
discrawl sync --guild 123456789012345678 --all-channels
discrawl sync --channels 111,222 --since 2026-03-01T00:00:00Z
```
Sync modes:
Sync sources:
| Source | Reads from | Stores |
| --- | --- | --- |
| `both` | Discord bot API and local Discord Desktop cache | bot-visible guild data plus classifiable cached desktop messages |
| `discord` / `key` | Discord bot API | guilds, channels, threads, members, and messages the bot can access |
| `wiretap` | local Discord Desktop cache files | classifiable cached messages; proven DMs are stored under `@me` |
Sync modes control the Discord bot API side of a run. When `wiretap` is selected, the desktop cache import runs once alongside the chosen bot sync mode.
Bot sync modes:
| Command | Use when | Behavior |
| --- | --- | --- |
| `discrawl sync` | routine refresh | imports any stale Git snapshot first, skips member refreshes, checks live top-level channels plus active threads, and only fetches new messages for channels with a stored latest cursor |
| `discrawl sync` | routine refresh | skips member refreshes, checks live top-level channels plus active threads, and only fetches new messages for channels with a stored latest cursor |
| `discrawl sync --update=auto` | hybrid Git/live refresh | imports a stale Git snapshot first, then runs the routine live refresh |
| `discrawl sync --all-channels` | repair pass | broad incremental sweep across every stored channel/thread, including archived threads |
| `discrawl sync --full` | historical backfill | crawls older history until channels are complete; can take a long time on large servers |
`sync` already uses parallel channel workers. `--concurrency` overrides the default, and the default is auto-sized from `GOMAXPROCS` with a floor of `8` and a cap of `32`.
`sync` already uses parallel channel workers for bot API message crawling.
`--concurrency` overrides the default, and the default is auto-sized from `GOMAXPROCS` with a floor of `8` and a cap of `32`.
`--all` ignores `default_guild_id` and fans out across every discovered guild the bot can access.
`--skip-members` refreshes guild/channel/message data without crawling the full member list, which is useful for frequent Git snapshot publishers that only need latest messages.
`--latest-only` is still accepted for explicit latest-only runs; it is now the default for untargeted `sync`. Use `--all-channels` to opt out of the fast default without doing a full historical crawl.
When `--channels` includes a forum channel id, `discrawl` expands that forum's threads and syncs their messages as part of the targeted run.
`--since` limits initial history/bootstrap and full-history backfill to messages at or after the given RFC3339 timestamp. It does not mark older history as complete, so a later `sync --full` without `--since` can continue the backfill.
Long runs now emit periodic progress logs to stderr so large backfills do not look hung.
Long runs now emit periodic progress logs to stderr so large backfills and Git snapshot imports do not look hung.
If in-flight channels stop completing for a while, `discrawl` now emits `message sync waiting` heartbeat logs with the oldest active channel, per-channel page activity, and skip/defer counters, and every run ends with a `message sync finished` summary.
Each channel crawl also has a bounded runtime budget, so a pathological channel is deferred and retried on the next sync instead of pinning a worker forever.
Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl, and marker cleanup is best-effort so one missing local sync-state row cannot crash the run.
Full sync member refresh is best-effort and currently gives up after five minutes without a caller-supplied deadline, so message sync completion is not held hostage by a slow guild member crawl.
When the archive is already complete, `sync --full` now reuses the stored backlog markers and limits steady-state refresh to live top-level channels plus active threads instead of revisiting every stored archived thread.
If a guild already has a local member snapshot, routine syncs reuse it and skip another full member crawl until that snapshot ages out.
@ -209,6 +264,35 @@ discrawl tail --guild 123456789012345678
discrawl tail --repair-every 30m
```
### `wiretap`
Imports classifiable Discord Desktop message payloads into the same local SQLite archive.
This is the path for searchable DMs because bot tokens cannot read personal direct messages.
`wiretap` is also available through `discrawl sync --source wiretap` and is included in the default `discrawl sync --source both` path.
```bash
discrawl wiretap
discrawl wiretap --path "$HOME/Library/Application Support/discord"
discrawl wiretap --dry-run
discrawl wiretap --full-cache
discrawl wiretap --watch-every 2m
```
Notes:
- stores classifiable cache messages in the same `guilds`, `channels`, and `messages` tables used by bot sync
- stores proven DMs under the synthetic guild id `@me`
- keeps `@me` rows local-only: `publish`, Git snapshot import/export, and optional embedding snapshot export exclude DM guilds, channels, messages, events, attachments, mentions, wiretap sync state, and vectors for DM messages
- preserves existing local `@me` guilds, channels, messages, and attachments when importing a Git snapshot, so a shared guild mirror refresh does not wipe local wiretap DM search
- drops message payloads whose channel cannot be classified from cached channel metadata or Discord route URLs; dropped rows are counted as `skipped_messages`
- imports what Discord Desktop has cached locally, not complete live DM history
- scans local `.ldb`, `.log`, `.json`, and `.txt` artifacts for Discord message JSON, plus route-bearing Chromium HTTP cache entries by default
- use `--full-cache` or `desktop.full_cache = true` for exhaustive Chromium cache import when you want slower historical guild-cache archaeology
- does not extract, store, or print Discord auth tokens
- `--max-file-bytes` skips unusually large files; default is 64 MiB
### `search`
Searches archived messages. FTS is the default mode and works without embeddings.
@ -219,6 +303,7 @@ discrawl search --mode fts "panic: nil pointer"
discrawl search --mode semantic "missing launch checklist"
discrawl search --mode hybrid "database timeout"
discrawl search --guild 123456789012345678 "payment failed"
discrawl search --dm "launch checklist"
discrawl search --channel billing --author steipete --limit 50 "invoice"
discrawl search --include-empty "GitHub"
discrawl --json search "websocket closed"
@ -232,6 +317,8 @@ Modes:
- `semantic` embeds the query, searches locally stored message vectors, and returns a clear error if embeddings are disabled or no compatible vectors exist.
- `hybrid` runs FTS and semantic search, deduplicates by message id, and falls back to FTS when semantic search is unavailable.
FTS uses SQLite FTS5 with the default `unicode61` tokenizer. User query terms are parameterized and quoted before `MATCH`, so tokens like `AND`, `OR`, `NOT`, `NEAR`, and `*` are searched as input terms instead of FTS operators. Punctuation still follows FTS5 tokenization rules.
Semantic and hybrid search require `[search.embeddings]` plus local `message_embeddings` rows for the configured provider, model, and input version. Run `discrawl sync --with-embeddings` to enqueue changed messages, then `discrawl embed` to generate vectors. The input version is currently `message_normalized_v1`, so vectors are tied to normalized message text rather than raw Discord payloads.
### `messages`
@ -244,6 +331,7 @@ discrawl messages --channel maintainers --hours 6 --all
discrawl messages --channel "#maintainers" --since 2026-03-01T00:00:00Z
discrawl messages --channel 1456744319972282449 --author steipete --limit 50
discrawl messages --channel maintainers --last 100 --sync
discrawl messages --dm --channel Molty --last 20
discrawl messages --channel maintainers --days 7 --all --include-empty
discrawl --json messages --channel maintainers --days 3
```
@ -258,6 +346,21 @@ Notes:
- `--sync` runs a blocking pre-query sync for the matching channel or guild scope before reading the local DB
- rows with no displayable/searchable content are skipped by default; `--include-empty` opts back in
- at least one filter is required
- `--dm` is shorthand for `--guild @me`, so DM searches and message slices do not need raw SQL
### `dms`
Lists local wiretap DM conversations or reads one DM thread.
```bash
discrawl dms
discrawl dms --with Molty --last 20
discrawl dms --with 1456464433768300635 --all
discrawl dms --search "launch checklist"
discrawl dms --with Molty --search "invoice"
```
`discrawl dms` shows one row per local DM channel with message count, author count, and first/last cached message times. Passing `--with` switches to message output for that DM conversation unless `--list` is also set. `--search` searches only local DM messages. This is a convenience layer over the local-only synthetic guild id `@me`; it skips Git snapshot auto-update because DMs are never imported from the shared mirror, and it still only sees Discord Desktop cache data imported by `wiretap`.
### `mentions`
@ -361,14 +464,14 @@ discrawl status
Publisher:
```bash
discrawl publish --remote https://github.com/openclaw/discord-backup.git --push
discrawl publish --remote https://github.com/example/discord-archive.git --push
discrawl publish --readme path/to/discord-backup/README.md --push
```
Subscriber:
```bash
discrawl subscribe https://github.com/openclaw/discord-backup.git
discrawl subscribe https://github.com/example/discord-archive.git
discrawl search "launch checklist"
discrawl messages --channel general --hours 24
```
@ -378,25 +481,25 @@ discrawl messages --channel general --hours 24
Configure freshness:
```bash
discrawl subscribe --stale-after 15m https://github.com/openclaw/discord-backup.git
discrawl subscribe --no-auto-update https://github.com/openclaw/discord-backup.git
discrawl subscribe --stale-after 15m https://github.com/example/discord-archive.git
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
```
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`). `discrawl update` forces the same pull/import step manually.
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`). Imports are planned from crawlkit shard fingerprints, with a Git-object fallback for older manifests, so routine updates normally read only changed tail shards and preserve local FTS rows instead of rebuilding the whole archive. `discrawl update` forces the same pull/import step manually. `discrawl sync` does not auto-import the share unless `--update=auto` or `--update=force` is provided, so routine live refreshes stay fast.
Hybrid mode is supported too: keep normal Discord credentials configured and set `share.remote`. `discrawl sync` and `discrawl messages --sync` import the Git snapshot first, then use live Discord for latest-message deltas. Use `sync --all-channels` or `sync --full` when you intentionally want a broader live repair/backfill pass.
Hybrid mode is supported too: keep normal Discord credentials configured and set `share.remote`. `discrawl sync --update=auto` and `discrawl messages --sync` import the Git snapshot first, usually as a changed-shard delta, then use live Discord for latest-message deltas. Use `sync --all-channels` or `sync --full` when you intentionally want a broader live repair/backfill pass.
Git snapshots publish archive tables by default. Embedding queue state stays local to each machine, and Git-only readers can use FTS immediately without an embedding provider.
Git snapshots publish non-DM archive tables by default. Embedding queue state stays local to each machine, and Git-only readers can use FTS immediately without an embedding provider.
Generated vectors can be backed up explicitly:
```bash
discrawl publish --with-embeddings --push
discrawl subscribe --with-embeddings https://github.com/openclaw/discord-backup.git
discrawl subscribe --with-embeddings https://github.com/example/discord-archive.git
discrawl update --with-embeddings
```
`--with-embeddings` exports stored `message_embeddings` rows for the configured `[search.embeddings]` provider/model plus the current input version. The snapshot stores those vectors under `embeddings/<provider>/<model>/<input_version>/...` and records that identity in `manifest.json`. Import only restores matching embedding manifests, so an Ollama/nomic subscriber does not accidentally import OpenAI/text-embedding vectors into semantic search. `embedding_jobs` is never exported; subscribers that want fresh local vectors can run `discrawl embed --rebuild` to create their own queue and vectors.
`--with-embeddings` exports stored `message_embeddings` rows for the configured `[search.embeddings]` provider/model plus the current input version. The snapshot stores those vectors under `embeddings/<provider>/<model>/<input_version>/...` and records that identity in `manifest.json`. Only vectors for non-DM messages are exported. Import only restores matching embedding manifests, so an Ollama/nomic subscriber does not accidentally import OpenAI/text-embedding vectors into semantic search. `embedding_jobs` is never exported; subscribers that want fresh local vectors can run `discrawl embed --rebuild` to create their own queue and vectors. Publishing without `--with-embeddings` omits embedding manifests instead of carrying forward an older bundle.
The Docker smoke test installs `discrawl` in a clean Go container, subscribes to a Git snapshot repo, then checks `search`, `messages`, `sql`, and `report`:
@ -415,16 +518,47 @@ discrawl report --readme path/to/discord-backup/README.md
Every scheduled snapshot publish updates deterministic README stats: latest update time, latest archived message, archive totals, and day/week/month activity.
The backup README field notes are intentionally a separate daily workflow, not part of `discrawl report`, so model latency or quota cannot block the 15-minute data publish path. `.github/workflows/discord-backup-report.yml` installs `openclaw@latest`, runs `openclaw agent --local` with OpenAI, and inserts a separate `discrawl-field-notes` block with:
The backup workflows restore and save `.discrawl-ci/discrawl.db` with `actions/cache`. On a warm runner cache, scheduled publishers skip the pre-sync snapshot import and go straight to the live latest-message delta before publishing. Cache misses still import the latest published snapshot first so `--latest-only` has channel cursors to resume from.
- what people seem to love
- what people complain about
- complaint topics correlated with recent GitHub issue and PR clusters
- the likely best PR to watch
### `digest`
Configure `OPENAI_API_KEY` in the discrawl repo secrets to enable agent-written field notes. `DISCORD_BACKUP_TOKEN` still needs write access to `openclaw/discord-backup`. If the GitHub repo used for issue/PR correlation is private, also set `DISCORD_FIELD_NOTES_GITHUB_TOKEN` with read access to that repo.
Summarizes per-channel activity for a lookback window.
The backup workflows restore and save `.discrawl-ci/discrawl.db` with `actions/cache`. On a warm runner cache, `discrawl update` compares the cached DB's last imported snapshot timestamp with `manifest.json` and skips the full sharded import when they match. Cache misses and newer backup manifests still take the normal pull/import path.
```bash
discrawl digest
discrawl digest --since 30d
discrawl digest --guild 123456789012345678
discrawl digest --channel general
discrawl --json digest --since 7d --top-n 5
```
Notes:
- `--since` accepts Go durations (`72h`, `30m`) and `Nd` shorthand (`7d`, `30d`)
- `--guild` scopes to one guild; when omitted, `default_guild_id` is used if configured
- `--channel` accepts a channel id or exact channel name
- `--top-n` controls how many top posters and mention targets are shown per channel
### `analytics`
Groups activity-style queries under one namespace.
```bash
discrawl analytics
discrawl analytics quiet --since 30d
discrawl analytics quiet --guild 123456789012345678
discrawl analytics trends --weeks 8
discrawl analytics trends --weeks 12 --channel general
discrawl --json analytics quiet --since 60d
discrawl --json analytics trends --weeks 4
```
Notes:
- `analytics quiet` shows top-level text/announcement channels with no messages in the lookback window, including never-active channels
- `analytics quiet --guild` scopes the report to one guild; when omitted, `default_guild_id` is used if configured
- `analytics trends` shows Monday-start UTC weekly message counts per message-capable channel
- `analytics trends --channel` accepts a channel id or exact channel name
### `doctor`
@ -449,17 +583,23 @@ cache_dir = "~/.discrawl/cache"
log_dir = "~/.discrawl/logs"
[discord]
token_source = "openclaw" # use "none" for Git-only read access
openclaw_config = "~/.openclaw/openclaw.json"
account = "default"
token_source = "env" # use "none" for Git-only read access
token_env = "DISCORD_BOT_TOKEN"
token_keyring_service = "discrawl"
token_keyring_account = "discord_bot_token"
[sync]
source = "both" # use "discord" for bot-only sync or "wiretap" for desktop-cache-only import
concurrency = 16
repair_every = "6h"
full_history = true
attachment_text = true
[desktop]
path = "~/.config/discord" # macOS default: "~/Library/Application Support/discord"
max_file_bytes = 67108864
full_cache = false
[search]
default_mode = "fts"
@ -484,7 +624,8 @@ Config override rules:
- `--config` beats everything
- `DISCRAWL_CONFIG` overrides the default config path
- `discord.token_source = "env"` forces env-only token lookup
- `discord.token_source = "none"` disables live Discord access for Git-only readers
- `discord.token_source = "keyring"` skips env lookup and reads only the configured OS keyring item
- `DISCRAWL_NO_AUTO_UPDATE=1` disables Git snapshot auto-update for read commands in one process, useful for report jobs that already imported a fresh backup.
## Embeddings
@ -538,6 +679,10 @@ With remote providers, message text is sent during `discrawl embed`, and search
- FTS index rows
- optional local embedding queue metadata and vectors
Messages imported from Discord Desktop use the same message, attachment, mention, and FTS paths as bot-synced messages.
Proven DMs use `@me` as their guild id. Unclassifiable desktop-cache payloads are skipped instead of being stored as unknown synthetic data.
SQLite schema migrations are versioned with `PRAGMA user_version`. Startup now fails fast when a local DB schema is newer than the supported binary.
Attachment binaries are not stored in SQLite.
@ -548,6 +693,7 @@ Set `sync.attachment_text = false` if you want to keep attachment metadata and f
- do not commit bot tokens or API keys
- default config lives in your home directory, not inside the repo
- prefer env vars or the OS keyring for bot tokens
- CI runs secret scanning with `gitleaks`
- `doctor` reports token source, not token contents
@ -560,9 +706,10 @@ go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.1 run
go test ./... -coverprofile=/tmp/discrawl.cover
go tool cover -func=/tmp/discrawl.cover | tail -n 1
go build ./cmd/discrawl
go run ./cmd/discrawl help | grep tui
```
Target coverage is `>= 80%`.
Target coverage is `>= 85%`.
CI runs:

121
SPEC.md
View File

@ -6,6 +6,7 @@ Goal:
- build a local-first Discord guild crawler
- mirror all guild data the configured bot can access
- import classifiable Discord Desktop cache messages without user tokens, including DMs
- store it in SQLite
- support fast text search, semantic search, and raw SQL
- support one-shot backfill and long-running live sync
@ -26,6 +27,7 @@ V1 scope:
- all accessible private threads
- archived thread coverage
- full message history
- desktop-local import from cached Discord Desktop artifacts, with proven DMs stored under `@me`
- current member snapshot
- FTS5 search
- optional OpenAI embeddings with local vector search
@ -33,7 +35,8 @@ V1 scope:
Out of scope for V1:
- personal-account DMs
- remote/API personal-account DM crawling
- Discord user-token automation/selfbot flows
- reactions as primary indexed entities
- attachment blob downloads by default
- cross-guild unified sync UX
@ -48,7 +51,7 @@ These are settled unless the user explicitly changes them:
- DB location: `~/.discrawl/discrawl.db`
- cache dir: `~/.discrawl/cache/`
- log dir: `~/.discrawl/logs/`
- token source: reuse Molty / existing OpenClaw Discord bot config
- token source: `DISCORD_BOT_TOKEN` or configured env var, then optional OS keyring fallback
- guild model: one guild in CLI UX, multi-guild-ready schema
- search: hybrid, with FTS first and embeddings optional
- embedding provider: OpenAI
@ -68,33 +71,12 @@ An agent should assume:
- Go is installed and modern
- user is Peter
- user keeps many secrets in `~/.profile`
- an existing OpenClaw install may already contain usable Discord bot config
### Key file paths
- `~/.discrawl/config.toml`
- `~/.discrawl/discrawl.db`
- `~/.profile`
- `~/.openclaw/openclaw.json`
- `~/.openclaw/openclaw.json.bak*`
### Existing bot config
The current bot token source is expected in:
- `~/.openclaw/openclaw.json`
Expected path inside JSON:
- `channels.discord.token`
Expected guild selection path:
- `channels.discord.guilds`
The current intended default mode is:
- `discrawl init --from-openclaw ~/.openclaw/openclaw.json`
### OpenAI embeddings key
@ -118,6 +100,8 @@ Important Discord facts that drive the schema:
- forum posts are threads under a forum parent
- message history is paginated and must be backfilled incrementally
- live updates come from Gateway events, not from polling alone
- personal DMs are only supported through desktop-local cache import
- desktop cache messages without a provable channel/guild route are skipped rather than stored as unknown data
- archived public and private threads must be enumerated explicitly
- private archived thread access may require elevated bot perms like `Manage Threads`
@ -410,6 +394,7 @@ discrawl [global flags] <command> [args]
- `init`
- `sync`
- `tail`
- `wiretap`
- `search`
- `sql`
- `members`
@ -422,12 +407,11 @@ discrawl [global flags] <command> [args]
Purpose:
- create `~/.discrawl/config.toml`
- import defaults from OpenClaw
- discover accessible Discord guilds
- persist guild id and DB path
Expected flags:
- `--from-openclaw <path>`
- `--guild <id>`
- `--db <path>`
- `--with-embeddings`
@ -468,6 +452,29 @@ Requirements:
- write checkpoints
- periodic repair sync
### `wiretap`
Purpose:
- import Discord Desktop cache artifacts into the local archive
- make cached personal DMs searchable under synthetic guild id `@me`
Expected flags:
- `--path <dir>`
- `--dry-run`
- `--watch-every <duration>`
- `--max-file-bytes <bytes>`
- `--full-cache`
Requirements:
- never use Discord user tokens
- never extract or persist auth tokens from desktop cache
- scan bounded local files only
- default to route-bearing HTTP cache entries; exhaustive Chromium cache scans require explicit full-cache mode
- store sanitized raw metadata, not full arbitrary cache blobs
### `search`
Purpose:
@ -529,7 +536,7 @@ Must show:
Must check:
- config file readable
- OpenClaw token source readable
- Discord token env var readable unless live access is disabled
- Discord auth valid
- guild reachable
- DB openable
@ -556,9 +563,10 @@ cache_dir = "~/.discrawl/cache"
log_dir = "~/.discrawl/logs"
[discord]
token_source = "openclaw"
openclaw_config = "~/.openclaw/openclaw.json"
channel_account = "discord"
token_source = "env"
token_env = "DISCORD_BOT_TOKEN"
token_keyring_service = "discrawl"
token_keyring_account = "discord_bot_token"
[sync]
concurrency = 4
@ -585,6 +593,7 @@ Config precedence:
Environment variables:
- `DISCRAWL_CONFIG`
- `DISCORD_BOT_TOKEN`
- `OPENAI_API_KEY`
## Token Handling Rules
@ -597,7 +606,8 @@ Do not:
Do:
- load bot token from OpenClaw config path
- load bot token from env
- fall back to the configured OS keyring item when env is empty
- load OpenAI key from env
- redact secrets in debug and doctor output
@ -781,3 +791,54 @@ For an AI agent to finish the product without external memory, this repo should
- milestone order
This file is the authoritative engineering spec for now.
## Digest
`discrawl digest` provides a per-channel activity summary over a lookback window.
Example usage:
```bash
discrawl digest
discrawl digest --since 7d
discrawl digest --since 30d --guild 123456789012345678
discrawl digest --channel general --top-n 5
discrawl --json digest --since 72h
```
Behavior:
- window defaults to `7d` when `--since` is omitted
- `--since` accepts Go durations (`72h`, `30m`) and `Nd` shorthand (`7d`, `30d`)
- `--guild` filters by `guild_id`; empty means no guild filter
- `--channel` accepts channel id or exact channel name
- per-channel metrics include `messages`, `replies`, and `active_authors`
- top posters are ranked by message count using member display fallback order: `display_name -> nick -> global_name -> username -> author_id -> unknown`
- top mentions are ranked from `mention_events` and include all target types (`user` and `role`)
- channels are sorted by message count descending, then channel name ascending
- JSON output returns a `Digest` object with channel rows and totals; plain output emits one tab-separated row per channel
## Analytics
`discrawl analytics` is a subcommand group for activity-style queries.
Example usage:
```bash
discrawl analytics
discrawl analytics quiet --since 30d
discrawl analytics quiet --guild 123456789012345678
discrawl analytics trends --weeks 8
discrawl analytics trends --weeks 12 --channel general
discrawl --json analytics quiet --since 60d
discrawl --json analytics trends --weeks 4
```
Behavior:
- `analytics quiet` defaults to `30d` lookback and supports `--guild`
- `analytics quiet` includes top-level text/announcement channels with no messages at all
- quiet rows are sorted with never-active channels first, then by longest silence
- `analytics trends` defaults to `8` weeks and supports `--guild` plus `--channel` (id or exact name)
- `analytics trends` buckets messages into Monday-start UTC weeks and zero-fills missing weeks for every returned message-capable channel
- trends rows are sorted by total messages descending, then channel name ascending

View File

@ -4,12 +4,17 @@ import (
"context"
"fmt"
"os"
"os/signal"
"syscall"
"github.com/steipete/discrawl/internal/cli"
"github.com/openclaw/discrawl/internal/cli"
)
func main() {
if err := cli.Run(context.Background(), os.Args[1:], os.Stdout, os.Stderr); err != nil {
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
err := cli.Run(ctx, os.Args[1:], os.Stdout, os.Stderr)
stop()
if err != nil {
fmt.Fprintln(os.Stderr, err.Error())
os.Exit(cli.ExitCode(err))
}

View File

@ -1,10 +1,18 @@
package main
import (
"bytes"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"syscall"
"testing"
"time"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/store"
)
func TestMainHelpAndVersion(t *testing.T) {
@ -27,7 +35,7 @@ func TestMainHelpAndVersion(t *testing.T) {
if err != nil {
t.Fatalf("os.Executable: %v", err)
}
cmd := exec.Command(exe, "-test.run=TestMainHelpAndVersion")
cmd := exec.CommandContext(t.Context(), exe, "-test.run=TestMainHelpAndVersion")
cmd.Env = append(os.Environ(), "DISCRAWL_MAIN_ERROR=1")
err = cmd.Run()
var exitErr *exec.ExitError
@ -38,3 +46,137 @@ func TestMainHelpAndVersion(t *testing.T) {
}
t.Fatalf("expected exit code 2, got %v", err)
}
func TestMainCancelsWatchOnSIGTERM(t *testing.T) {
if os.Getenv("DISCRAWL_MAIN_SIGNAL_CHILD") == "1" {
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.toml")
cfg := config.Default()
cfg.DBPath = filepath.Join(dir, "discrawl.db")
cfg.CacheDir = filepath.Join(dir, "cache")
cfg.LogDir = filepath.Join(dir, "logs")
cfg.Desktop.Path = filepath.Join(dir, "discord")
requireNoError(t, os.MkdirAll(cfg.Desktop.Path, 0o755))
requireNoError(t, config.Write(cfgPath, cfg))
oldArgs := os.Args
t.Cleanup(func() { os.Args = oldArgs })
os.Args = []string{"discrawl", "--config", cfgPath, "wiretap", "--dry-run", "--watch-every", "1s"}
go func() {
time.Sleep(50 * time.Millisecond)
process, err := os.FindProcess(os.Getpid())
if err == nil {
_ = process.Signal(syscall.SIGTERM)
}
}()
main()
return
}
exe, err := os.Executable()
if err != nil {
t.Fatalf("os.Executable: %v", err)
}
cmd := exec.CommandContext(t.Context(), exe, "-test.run=TestMainCancelsWatchOnSIGTERM")
cmd.Env = append(os.Environ(), "DISCRAWL_MAIN_SIGNAL_CHILD=1")
output, err := cmd.CombinedOutput()
if isContextCanceledExit(err, output) {
return
}
if err != nil {
t.Fatalf("expected graceful SIGTERM cancellation, got %v", err)
}
}
func TestMainCancelsWiretapImportOnSIGTERMWithoutCorruptingDB(t *testing.T) {
if dir := os.Getenv("DISCRAWL_MAIN_IMPORT_SIGNAL_DIR"); dir != "" {
runWiretapImportSignalChild(t, dir)
return
}
dir := t.TempDir()
exe, err := os.Executable()
if err != nil {
t.Fatalf("os.Executable: %v", err)
}
cmd := exec.CommandContext(t.Context(), exe, "-test.run=TestMainCancelsWiretapImportOnSIGTERMWithoutCorruptingDB")
cmd.Env = append(os.Environ(), "DISCRAWL_MAIN_IMPORT_SIGNAL_DIR="+dir)
output, err := cmd.CombinedOutput()
if !isContextCanceledExit(err, output) {
t.Fatalf("expected context-canceled exit from SIGTERM, got err=%v output=%s", err, output)
}
ctx := t.Context()
s, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
if err != nil {
t.Fatalf("open db after SIGTERM: %v output=%s", err, output)
}
defer func() { _ = s.Close() }()
_, rows, err := s.ReadOnlyQuery(ctx, "pragma quick_check")
if err != nil {
t.Fatalf("quick_check after SIGTERM: %v output=%s", err, output)
}
if len(rows) != 1 || len(rows[0]) != 1 || rows[0][0] != "ok" {
t.Fatalf("quick_check after SIGTERM = %#v output=%s", rows, output)
}
}
func runWiretapImportSignalChild(t *testing.T, dir string) {
t.Helper()
cfgPath := filepath.Join(dir, "config.toml")
cfg := config.Default()
cfg.DBPath = filepath.Join(dir, "discrawl.db")
cfg.CacheDir = filepath.Join(dir, "cache")
cfg.LogDir = filepath.Join(dir, "logs")
cfg.Desktop.Path = filepath.Join(dir, "discord")
cfg.Discord.TokenSource = "none"
cfg.Share.AutoUpdate = false
cachePath := filepath.Join(cfg.Desktop.Path, "Local Storage", "leveldb")
requireNoError(t, os.MkdirAll(cachePath, 0o755))
requireNoError(t, config.Write(cfgPath, cfg))
writeLargeWiretapCache(t, filepath.Join(cachePath, "000001.log"), 50000)
oldArgs := os.Args
t.Cleanup(func() { os.Args = oldArgs })
os.Args = []string{"discrawl", "--config", cfgPath, "wiretap", "--path", cfg.Desktop.Path}
go func() {
time.Sleep(15 * time.Millisecond)
process, err := os.FindProcess(os.Getpid())
if err == nil {
_ = process.Signal(syscall.SIGTERM)
}
}()
main()
}
func writeLargeWiretapCache(t *testing.T, path string, count int) {
t.Helper()
file, err := os.Create(path)
requireNoError(t, err)
defer func() { requireNoError(t, file.Close()) }()
_, err = fmt.Fprintln(file, `{"id":"111111111111111117","guild_id":"999999999999999997","type":0,"name":"sigterm-import"}`)
requireNoError(t, err)
for i := range count {
_, err = fmt.Fprintf(
file,
`{"id":"3333333333%09d","channel_id":"111111111111111117","content":"sigterm import message %d","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222228","username":"alice"}}`+"\n",
i,
i,
)
requireNoError(t, err)
}
}
func isContextCanceledExit(err error, output []byte) bool {
var exitErr *exec.ExitError
return errors.As(err, &exitErr) && exitErr.ExitCode() == 1 && bytes.Contains(output, []byte("context canceled"))
}
func requireNoError(t *testing.T, err error) {
t.Helper()
if err != nil {
t.Fatal(err)
}
}

1
docs/CNAME Normal file
View File

@ -0,0 +1 @@
discrawl.sh

46
docs/README.md Normal file
View File

@ -0,0 +1,46 @@
# Discrawl
Mirror Discord guilds into local SQLite. Search server history without depending on Discord search. Bring a bot token, or read everything offline from a Git snapshot.
## What it does
- discovers every guild a bot can access and syncs channels, threads, members, and message history into SQLite
- maintains FTS5 indexes for fast literal search; optional embeddings for semantic and hybrid recall
- imports classifiable Discord Desktop cache messages with `wiretap`, including proven DMs under `@me`
- tails the Gateway for live updates with periodic repair sweeps
- publishes the archive as sharded NDJSON snapshots in a private Git repo so readers can search offline with no Discord credentials
- exposes read-only SQL, channel/member directories, mention queries, digests, and trend analytics
## Pick your path
- **New here?** Read [Install](install.html) and run `discrawl init`.
- **Already have a bot?** Jump to [`sync`](commands/sync.html) and [`search`](commands/search.html).
- **Just want to read a shared archive?** Use [`subscribe`](commands/subscribe.html) - no token needed.
- **Need DM search?** [`wiretap`](commands/wiretap.html) imports local Discord Desktop cache.
- **Want semantic search?** Configure [Embeddings](guides/embeddings.html), then run [`embed`](commands/embed.html).
- **Wiring an agent or launcher?** `discrawl metadata --json`, `discrawl status --json`, and `discrawl doctor --json` expose the read-only crawlkit control surface.
## At a glance
```bash
export DISCORD_BOT_TOKEN="..."
discrawl init
discrawl doctor
discrawl sync --full
discrawl search "panic: nil pointer"
discrawl tail
```
[`discrawl tui`](commands/tui.html) uses the shared crawlkit terminal explorer:
channel/person/thread groups on the left, message rows in the middle, and
readable message/thread detail on the right.
## Sections
- **[Start](install.html)** - install, configure, set up the Discord bot, security notes, contact
- **[Guides](guides/)** - sync sources, wiretap internals, search modes, embeddings, Git snapshots, data layout
- **[Commands](commands/)** - one page per CLI command
## Where to file issues
`https://github.com/openclaw/discrawl/issues`. See [contact](contact.html) for project links.

View File

@ -7,7 +7,7 @@ summary: "Release checklist for discrawl (GitHub release binaries via GoReleaser
Always do all steps below. No partial releases.
Assumptions:
- Repo: `steipete/discrawl`
- Repo: `openclaw/discrawl`
- Binary: `discrawl`
- GoReleaser config: `.goreleaser.yaml`
- Homebrew tap repo: `~/Projects/homebrew-tap`
@ -22,14 +22,15 @@ Assumptions:
## 1) Verify build + tests
```sh
go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.5.0 run
go test ./... -coverprofile=coverage.out
go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.1 run
go test -count=1 ./... -coverprofile=coverage.out
go tool cover -func=coverage.out | tail -n 1
go test -count=1 -race ./...
go build -o /tmp/discrawl ./cmd/discrawl
gh run list -L 5 --branch main
```
Coverage floor: `80%+`
Coverage floor: `85%+`
## 2) Update changelog
@ -81,7 +82,7 @@ After tagging a real release:
Useful commands:
```sh
curl -L -o /tmp/discrawl-darwin-arm64.tgz https://github.com/steipete/discrawl/releases/download/vX.Y.Z/discrawl_X.Y.Z_darwin_arm64.tar.gz
curl -L -o /tmp/discrawl-darwin-arm64.tgz https://github.com/openclaw/discrawl/releases/download/vX.Y.Z/discrawl_X.Y.Z_darwin_arm64.tar.gz
shasum -a 256 /tmp/discrawl-darwin-arm64.tgz
brew uninstall discrawl || true
brew install steipete/tap/discrawl
@ -91,7 +92,7 @@ brew info steipete/tap/discrawl
## Notes
- Build-time version stamping comes from `-X github.com/steipete/discrawl/internal/cli.version={{ .Version }}`
- Build-time version stamping comes from `-X github.com/openclaw/discrawl/internal/cli.version={{ .Version }}`
- If release workflow needs a rerun:
```sh

63
docs/bot-setup.md Normal file
View File

@ -0,0 +1,63 @@
# Discord bot setup
Discrawl needs a real Discord bot token to run `sync` or `tail`. Not a user token. The desktop `wiretap` import does not need any token.
## Minimum setup
1. Create or reuse a Discord application in the [Discord developer portal](https://discord.com/developers/applications).
2. Add a bot user to that application.
3. Invite the bot to the target guilds.
4. Enable these intents for the bot:
- **Server Members Intent**
- **Message Content Intent**
5. Ensure the bot can at least:
- view channels
- read message history
Without those intents/permissions, `sync`, `tail`, member snapshots, and message content archiving will be partial or fail outright.
## Provide the token
### Environment variable
```bash
export DISCORD_BOT_TOKEN="your-bot-token"
discrawl doctor
```
If you keep shell secrets in `~/.profile`, add the export there and reload your shell.
### OS keyring
If you prefer the OS keyring, keep the token out of config and store it in the default keyring item:
```bash
# macOS Keychain
security add-generic-password -U -s discrawl -a discord_bot_token -w "$DISCORD_BOT_TOKEN"
# Linux Secret Service / libsecret
printf %s "$DISCORD_BOT_TOKEN" | secret-tool store --label="discrawl Discord bot token" service discrawl username discord_bot_token
# Windows Credential Manager
cmdkey /generic:discrawl:discord_bot_token /user:discord_bot_token /pass:%DISCORD_BOT_TOKEN%
```
Set `discord.token_source = "keyring"` if you want to require the keyring and skip env entirely.
## Verify
```bash
discrawl doctor
```
`doctor` reports the token source (env or keyring), confirms bot auth, lists how many guilds the bot can access, and verifies the local DB plus FTS wiring. It does not print the token contents.
## Wiretap-only setup
If you only want to import local Discord Desktop cache messages and not run a bot, skip everything above and run:
```bash
discrawl sync --source wiretap
```
Or `discrawl wiretap` directly. See the [wiretap guide](guides/wiretap.html).

View File

@ -0,0 +1,37 @@
# `analytics`
Groups activity-style queries under one namespace.
## Usage
```bash
discrawl analytics
discrawl analytics quiet --since 30d
discrawl analytics quiet --guild 123456789012345678
discrawl analytics trends --weeks 8
discrawl analytics trends --weeks 12 --channel general
discrawl --json analytics quiet --since 60d
discrawl --json analytics trends --weeks 4
```
## Subcommands
### `quiet`
Top-level text/announcement channels with no messages in the lookback window, including never-active channels.
- `--since <duration>` - lookback window (e.g. `30d`, `60d`)
- `--guild <id>` - scope to one guild; when omitted, `default_guild_id` is used if configured
### `trends`
Monday-start UTC weekly message counts per message-capable channel.
- `--weeks <n>` - number of weeks to include
- `--channel <id|name>` - scope to one channel
- `--guild <id>` - scope to one guild
## See also
- [`digest`](digest.html)
- [`status`](status.html)

25
docs/commands/channels.md Normal file
View File

@ -0,0 +1,25 @@
# `channels`
Browse the offline channel directory.
## Usage
```bash
discrawl channels list
discrawl channels show 123456789012345678
```
## Subcommands
- `list` - dump every channel and thread in the local archive
- `show <id>` - show metadata for one channel/thread
## Notes
- threads are stored as channels because that matches the Discord model
- archived threads are part of the sync surface and appear here too
## See also
- [`members`](members.html)
- [Data layout](../guides/data-storage.html)

29
docs/commands/digest.md Normal file
View File

@ -0,0 +1,29 @@
# `digest`
Summarizes per-channel activity for a lookback window.
## Usage
```bash
discrawl digest
discrawl digest --since 30d
discrawl digest --guild 123456789012345678
discrawl digest --channel general
discrawl --json digest --since 7d --top-n 5
```
## Flags
- `--since <duration>` - Go durations (`72h`, `30m`) and `Nd` shorthand (`7d`, `30d`)
- `--guild <id>` - scope to one guild; when omitted, `default_guild_id` is used if configured
- `--channel <id|name>` - scope to one channel
- `--top-n <n>` - how many top posters and mention targets per channel
## Output
For each channel in scope: message count, top posters, top mention targets, first/last activity in window.
## See also
- [`analytics`](analytics.html)
- [`mentions`](mentions.html)

39
docs/commands/dms.md Normal file
View File

@ -0,0 +1,39 @@
# `dms`
Lists local wiretap DM conversations or reads one DM thread. Convenience layer over the synthetic `@me` guild id.
## Usage
```bash
discrawl dms
discrawl dms --with Molty --last 20
discrawl dms --with 1456464433768300635 --all
discrawl dms --search "launch checklist"
discrawl dms --with Molty --search "invoice"
```
## Default output
`discrawl dms` (no flags) shows one row per local DM channel with:
- message count
- author count
- first/last cached message times
## Flags
- `--with <name|id>` - switches to message output for that DM conversation (unless `--list` is also set)
- `--list` - keep the channel-summary listing even when `--with` is set
- `--search <query>` - search only local DM messages
- `--last <n>` / `--all` / `--limit <n>` - same slicing as [`messages`](messages.html)
## Notes
- only sees data imported by [`wiretap`](wiretap.html) - Discord Desktop cache, not live DM history
- skips Git snapshot auto-update because DMs are never imported from the shared mirror
- DMs are local-only and never published
## See also
- [Wiretap guide](../guides/wiretap.html)
- [`messages --dm`](messages.html)

35
docs/commands/doctor.md Normal file
View File

@ -0,0 +1,35 @@
# `doctor`
Checks config, auth, DB, and FTS wiring. The fastest sanity check.
## Usage
```bash
discrawl doctor
```
## What it verifies
- config loads from the expected path
- where the bot token was resolved from (env var or OS keyring)
- bot auth succeeds against Discord
- how many guilds the bot can access
- local SQLite database exists and the schema version matches the binary
- FTS5 index is wired up
## What it does not do
- does not print the token contents
- does not run a sync; it only checks readiness
## Common outputs
- "token from env (DISCORD_BOT_TOKEN)" or "token from keyring (discrawl/discord_bot_token)"
- "0 guilds visible" - bot is not invited to any guild yet, or intents/permissions are missing
- "schema newer than binary" - update `discrawl` to a build that supports the local DB schema
## See also
- [Bot setup](../bot-setup.html)
- [Configuration](../configuration.html)
- [`status`](status.html)

42
docs/commands/embed.md Normal file
View File

@ -0,0 +1,42 @@
# `embed`
Drains pending `embedding_jobs` rows by calling the configured embedding provider and writing vectors to `message_embeddings`.
## Usage
```bash
discrawl embed
discrawl embed --limit 1000
discrawl embed --rebuild --limit 1000
```
## Flags
- `--limit <n>` - cap how many jobs this run drains
- `--batch-size <n>` - provider request batch size
- `--rebuild` - regenerate vectors for the existing archive after a provider/model/input-version change
## Behavior
- claims jobs with a short lock so overlapping runs do not process the same batch
- rate limits requeue the batch and stop that drain run cleanly
- provider or validation failures retry up to three attempts before the job is marked failed
- messages with no normalized text are marked done and any stale vector for that message is removed
## Identity
Provider, model, and input version are stored on each job and vector. Changing any of them retargets pending jobs to the new identity and resets prior attempts. Existing vectors for another identity remain in SQLite but are not used by semantic search.
## When to use `--rebuild`
After changing `[search.embeddings]` provider, model, or any input setting, when you want to regenerate vectors for messages already in the archive.
## Pairing with `sync`
`sync --with-embeddings` enqueues; `embed` drains. The two phases are intentionally separate so a slow provider does not block the hot sync path.
## See also
- [Embeddings guide](../guides/embeddings.html)
- [Search modes](../guides/search-modes.html)
- [`search`](search.html)

31
docs/commands/init.md Normal file
View File

@ -0,0 +1,31 @@
# `init`
Creates the local config and discovers accessible guilds.
## Usage
```bash
discrawl init
discrawl init --guild 123456789012345678
discrawl init --db ~/data/discrawl.db
discrawl init --with-embeddings
```
## What it does
- writes `~/.discrawl/config.toml` (or whatever `--config` / `DISCRAWL_CONFIG` points to)
- discovers guilds the configured bot can access
- if exactly one guild is available, sets it as `default_guild_id` automatically
- creates the SQLite database at `db_path`
## Flags
- `--guild <id>` - set a specific default guild instead of auto-picking
- `--db <path>` - override `db_path`
- `--with-embeddings` - enable `[search.embeddings]` in the generated config
## See also
- [Configuration](../configuration.html)
- [Bot setup](../bot-setup.html)
- [`doctor`](doctor.html)

72
docs/commands/members.md Normal file
View File

@ -0,0 +1,72 @@
# `members`
Browse the offline member directory built from archived profile payloads.
## Usage
```bash
discrawl members list
discrawl members show 123456789012345678
discrawl members show --messages 10 steipete
discrawl members search "peter"
discrawl members search "github"
discrawl members search "https://github.com/steipete"
```
## Subcommands
- `list` - dump the local member directory
- `show <id|query>` - show one member; if the query resolves to one match, also show recent messages
- `search <query>` - match names plus any offline profile fields present in the archived member payload
## Flags
- `show --messages <n>` - include the most recent `n` messages from that member
## Profile fields
Extracted from the archived Discord member/user payload. May include:
- `bio`
- `pronouns`
- `location`
- `website`
- `x`
- `github`
- discovered URLs
If the bot cannot see a field from Discord, `discrawl` cannot invent it. This is strictly archive-based offline data.
## Typical workflow
```bash
discrawl sync --full
discrawl members search "design engineer"
discrawl members search "github"
discrawl members show --messages 25 steipete
discrawl messages --author steipete --days 30 --all
```
## Typical `members show` output
```text
guild=1456350064065904867
user=37658261826043904
username=steipete
display=Peter Steinberger
joined=2026-03-08T16:03:14Z
bot=false
x=steipete
github=steipete
website=https://steipete.me
bio=Builds native apps and tooling.
urls=https://steipete.me, https://github.com/steipete
message_count=1284
first_message=2026-02-01T09:00:00Z
last_message=2026-03-08T15:59:58Z
```
## See also
- [`channels`](channels.html)
- [Data layout](../guides/data-storage.html)

27
docs/commands/mentions.md Normal file
View File

@ -0,0 +1,27 @@
# `mentions`
Lists structured user and role mentions extracted during sync.
## Usage
```bash
discrawl mentions --channel maintainers --days 7
discrawl mentions --target steipete --type user --limit 50
discrawl mentions --target 1456406468898197625
discrawl --json mentions --type role --days 1
```
## Flags
- `--target <id|name>` - user or role id, exact name, or partial match
- `--type <user|role>` - filter by mention type
- `--channel <id|name>` - same channel matching as [`messages`](messages.html)
- `--guild <id>` / `--guilds <id,id>` - restrict the guild scope
- `--days <n>` / `--since <RFC3339>` / `--before <RFC3339>` - time filters
- `--limit <n>` - cap result count
## Notes
- mentions are recorded structurally during sync, so this is a direct row read - no FTS parsing
- combine with `--type role` to find every mention of a role
- combine with `--target steipete` to find everywhere your account got pinged

41
docs/commands/messages.md Normal file
View File

@ -0,0 +1,41 @@
# `messages`
Lists exact message slices by channel, author, and time range. Unlike [`search`](search.html), this does not query the FTS index - it pulls a slice of rows.
## Usage
```bash
discrawl messages --channel maintainers --days 7 --all
discrawl messages --channel maintainers --hours 6 --all
discrawl messages --channel "#maintainers" --since 2026-03-01T00:00:00Z
discrawl messages --channel 1456744319972282449 --author steipete --limit 50
discrawl messages --channel maintainers --last 100 --sync
discrawl messages --dm --channel Molty --last 20
discrawl messages --channel maintainers --days 7 --all --include-empty
discrawl --json messages --channel maintainers --days 3
```
## Flags
- `--channel <id|name|#name>` - id, exact name, `#name`, or partial name match
- `--guild <id>` / `--guilds <id,id>` / `--dm` - restrict the guild scope (`--dm` is shorthand for `--guild @me`)
- `--author <name>` - restrict to one author
- `--hours <n>` - shorthand for "since now minus N hours"
- `--days <n>` - shorthand for "since now minus N days"
- `--since <RFC3339>` - explicit start timestamp
- `--last <n>` - return the newest `N` matching messages, then print oldest-to-newest
- `--limit <n>` - safety limit (default 200; `--all` removes it)
- `--all` - removes the safety limit
- `--sync` - blocking pre-query sync for the matching channel or guild scope
- `--include-empty` - include rows with no displayable/searchable content
## Notes
- at least one filter is required
- `--dm` skips Git snapshot auto-update because DMs are never imported from the shared mirror
- use either `--last` for the newest matching rows or `--all` for an uncapped oldest-to-newest slice
## See also
- [`search`](search.html)
- [`dms`](dms.html)

42
docs/commands/publish.md Normal file
View File

@ -0,0 +1,42 @@
# `publish`
Publishes the local SQLite archive as sharded, compressed NDJSON snapshots in a private Git repo.
## Usage
```bash
discrawl publish --remote https://github.com/example/discord-archive.git --push
discrawl publish --readme path/to/discord-backup/README.md --push
discrawl publish --message "sync: discord archive" --push
discrawl publish --with-embeddings --push
```
## Flags
- `--repo <path>` - local snapshot repo path (defaults to `[share].repo_path`)
- `--remote <url>` - target Git remote (defaults to `[share].remote`)
- `--branch <name>` - snapshot branch (defaults to `[share].branch`)
- `--message <text>` - commit message (default: `sync: discord archive`)
- `--no-commit` - write/export files without creating a Git commit
- `--push` - push the snapshot commit after writing it
- `--readme <path>` - update the activity block in this README file too
- `--with-embeddings` - also export stored `message_embeddings` rows
## What is published
- non-DM archive tables (DM `@me` rows are always excluded)
- README activity block (latest update, latest message, totals, day/week/month activity)
- with `--with-embeddings`: vectors for the configured `[search.embeddings]` provider/model/input version, plus identity manifest
## What is not published
- `@me` DM guilds, channels, messages, events, attachments, mentions, wiretap sync state
- `embedding_jobs`
- raw bot tokens or any local secret
## See also
- [Git snapshots guide](../guides/git-snapshots.html)
- [`subscribe`](subscribe.html)
- [`update`](update.html)
- [`report`](report.html)

35
docs/commands/report.md Normal file
View File

@ -0,0 +1,35 @@
# `report`
Generates the Markdown activity block used by the shared backup repo README.
## Usage
```bash
discrawl report
discrawl report --readme path/to/discord-backup/README.md
```
## Flags
- `--readme <path>` - update the activity block in the given README file in place
## What gets rendered
Deterministic README stats:
- latest update time
- latest archived message
- archive totals
- day / week / month activity
Every scheduled snapshot publish updates this block.
## CI integration
The backup workflows restore and save `.discrawl-ci/discrawl.db` with `actions/cache`. On a warm runner cache, scheduled publishers skip the pre-sync snapshot import and go straight to the live latest-message delta before publishing. Cache misses still import the latest published snapshot first so `--latest-only` has channel cursors to resume from.
## See also
- [`publish`](publish.html)
- [Git snapshots](../guides/git-snapshots.html)
- [`status`](status.html)

51
docs/commands/search.md Normal file
View File

@ -0,0 +1,51 @@
# `search`
Searches archived messages. FTS is the default mode and works without embeddings.
## Usage
```bash
discrawl search "panic: nil pointer"
discrawl search --mode fts "panic: nil pointer"
discrawl search --mode semantic "missing launch checklist"
discrawl search --mode hybrid "database timeout"
discrawl search --guild 123456789012345678 "payment failed"
discrawl search --dm "launch checklist"
discrawl search --channel billing --author steipete --limit 50 "invoice"
discrawl search --include-empty "GitHub"
discrawl --json search "websocket closed"
```
## Modes
- `fts` (default) - SQLite FTS5 with `unicode61` tokenizer; newest matches first
- `semantic` - embeds the query, scores against locally stored vectors; errors out if embeddings are disabled or no compatible vectors exist
- `hybrid` - runs both, deduplicates by message id, falls back to FTS when semantic is unavailable
## Flags
- `--mode <fts|semantic|hybrid>` - search mode
- `--guild <id>` / `--guilds <id,id>` - restrict the guild scope
- `--dm` - shorthand for `--guild @me`
- `--channel <id|name|#name>` - restrict to one channel (id, exact name, `#name`, or partial match)
- `--author <name>` - restrict to one author
- `--limit <n>` - cap result count
- `--include-empty` - include rows with no searchable content (attachment text/filenames, embeds, and replies still count as content)
## FTS behavior
User query terms are parameterized and quoted before `MATCH`, so tokens like `AND`, `OR`, `NOT`, `NEAR`, and `*` are searched as input terms instead of FTS operators. Punctuation still follows FTS5 tokenization rules.
## Semantic prerequisites
- `[search.embeddings]` configured in `~/.discrawl/config.toml`
- local `message_embeddings` rows for the configured provider, model, and input version
- input version is currently `message_normalized_v1`
Run `discrawl sync --with-embeddings` to enqueue, then `discrawl embed` to generate vectors.
## See also
- [Search modes](../guides/search-modes.html)
- [Embeddings](../guides/embeddings.html)
- [`messages`](messages.html) - exact slices, not search

25
docs/commands/sql.md Normal file
View File

@ -0,0 +1,25 @@
# `sql`
Runs read-only SQL against the local database.
## Usage
```bash
discrawl sql 'select count(*) as messages from messages'
echo 'select guild_id, count(*) from messages group by guild_id' | discrawl sql -
```
`-` reads SQL from stdin.
## Notes
- read-only - writes are blocked at the connection level
- `--unsafe --confirm` opens the escape hatch for deliberate write/admin SQL
- the schema is multi-guild ready; threads are stored as channels because that matches the Discord model
- proven DMs use the synthetic guild id `@me`
- SQLite schema migrations are versioned with `PRAGMA user_version`; startup fails fast when a local DB schema is newer than the supported binary
## See also
- [Data layout](../guides/data-storage.html) - what tables exist
- [`status`](status.html) - high-level archive numbers without raw SQL

24
docs/commands/status.md Normal file
View File

@ -0,0 +1,24 @@
# `status`
Shows local archive status.
## Usage
```bash
discrawl status
```
## Reports
- where the local database lives
- guild count and per-guild totals
- channel and thread counts
- message totals
- latest archived message time
- whether the Git share is configured and how stale the local import is
- embeddings status if `[search.embeddings]` is enabled
## See also
- [`doctor`](doctor.html) - liveness check (config, auth, DB, FTS wiring)
- [`report`](report.html) - Markdown activity block for the shared backup README

View File

@ -0,0 +1,48 @@
# `subscribe`
Subscribes to a Git-backed snapshot repo. The Git-only setup path - no Discord bot token required.
## Usage
```bash
discrawl subscribe https://github.com/example/discord-archive.git
discrawl subscribe --repo ~/.discrawl/share https://github.com/example/discord-archive.git
discrawl subscribe --branch main https://github.com/example/discord-archive.git
discrawl subscribe --stale-after 15m https://github.com/example/discord-archive.git
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
discrawl subscribe --no-import https://github.com/example/discord-archive.git
discrawl subscribe --with-embeddings https://github.com/example/discord-archive.git
```
## What it does
- writes a config with `discord.token_source = "none"` (so no bot token is required)
- imports the latest snapshot into the local SQLite archive
- enables auto-refresh: read commands fetch and import when the local share import is older than `share.stale_after` (default `15m`)
## Flags
- `--repo <path>` - local snapshot repo path
- `--branch <name>` - snapshot branch (default: `main`)
- `--stale-after <duration>` - how stale the local import can get before read commands auto-refresh
- `--no-auto-update` - disable auto-refresh (use [`update`](update.html) manually)
- `--no-import` - write config only; skip the initial pull/import
- `--with-embeddings` - import vectors that match your local `[search.embeddings]` identity
## Disabled in this mode
`sync` and `tail` are disabled when `discord.token_source = "none"` because they need live Discord access. Switch to a token-equipped config to re-enable them.
## After subscribing
```bash
discrawl search "launch checklist"
discrawl messages --channel general --hours 24
discrawl status
```
## See also
- [Git snapshots guide](../guides/git-snapshots.html)
- [`publish`](publish.html)
- [`update`](update.html)

82
docs/commands/sync.md Normal file
View File

@ -0,0 +1,82 @@
# `sync`
Refreshes SQLite from one or both archive sources.
By default, `sync` runs both live/local sources and does **not** import the Git snapshot first:
- Discord bot-token sync for bot-visible guild data
- local Discord Desktop cache import for classifiable cached messages and proven DMs
Use [`update`](update.html) when you want to pull/import the shared Git snapshot. Snapshot imports normally use changed-shard deltas, but unsafe table changes fall back to a full import. If you intentionally want a sync run to import the snapshot before live deltas, pass `--update=auto` (only when stale) or `--update=force` (always). `--no-update` is accepted as an explicit no-op alias for the default.
Run one explicit `--full` pass when you want a complete historical guild archive. Use plain `sync` afterward for frequent latest-message and desktop-cache refreshes.
## Usage
```bash
discrawl sync
discrawl sync --update=auto
discrawl sync --update=force
discrawl sync --no-update
discrawl sync --full
discrawl sync --full --all
discrawl sync --guild 123456789012345678
discrawl sync --guilds 123,456 --concurrency 8
discrawl sync --source both # default: bot API + desktop cache
discrawl sync --source discord # bot API only; aliases: key, bot, api
discrawl sync --source wiretap # desktop cache only; aliases: desktop, cache
discrawl sync --guild 123456789012345678 --all-channels
discrawl sync --channels 111,222 --since 2026-03-01T00:00:00Z
discrawl sync --with-embeddings
```
## Sources
| Source | Reads from | Stores |
| --- | --- | --- |
| `both` | Discord bot API and local Discord Desktop cache | bot-visible guild data plus classifiable cached desktop messages |
| `discord` / `key` | Discord bot API | guilds, channels, threads, members, and messages the bot can access |
| `wiretap` | local Discord Desktop cache files | classifiable cached messages; proven DMs are stored under `@me` |
## Bot sync modes
| Command | Use when | Behavior |
| --- | --- | --- |
| `discrawl sync` | routine refresh | skips member refreshes, checks live top-level channels plus active threads, only fetches new messages for channels with a stored cursor |
| `discrawl sync --update=auto` | hybrid Git/live refresh | imports a stale Git snapshot first, then runs the routine live refresh |
| `discrawl sync --all-channels` | repair pass | broad incremental sweep across every stored channel/thread, including archived threads |
| `discrawl sync --full` | historical backfill | crawls older history until channels are complete |
## Flags
- `--source <both|discord|wiretap>` - which archive sources to read
- `--update <auto|force|none>` - whether to import the Git snapshot before live deltas
- `--full` - historical backfill (slow on large guilds)
- `--all-channels` - broader incremental sweep across every stored channel/thread
- `--latest-only` - explicit latest-only run (also the default for untargeted `sync`)
- `--all` - ignore `default_guild_id` and fan out across every discovered guild
- `--guild <id>` / `--guilds <id,id>` - target specific guilds
- `--channels <id,id>` - target specific channels (forum ids expand to threads)
- `--since <RFC3339>` - limit initial history and `--full` backfill to messages at or after this timestamp
- `--concurrency <n>` - override worker count (default auto-sized: floor 8, cap 32)
- `--skip-members` - refresh guild/channel/message data without crawling members
- `--with-embeddings` - also enqueue changed messages into `embedding_jobs`
## Notes
- `--latest-only` is the default for untargeted `sync`. Use `--all-channels` to opt out without doing a full historical crawl.
- `--since` does not mark older history as complete, so a later `sync --full` without `--since` can continue the backfill.
- Long runs emit periodic progress logs to stderr.
- Heartbeat logs (`message sync waiting`) name the oldest active channel and per-channel page activity if in-flight channels stop completing for a while.
- Every run ends with a `message sync finished` summary.
- Each channel crawl has a bounded runtime budget; pathological channels are deferred and retried next sync.
- Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl.
- Marker cleanup is best-effort, so one missing local sync-state row cannot crash the run.
- Full sync member refresh is best-effort and gives up after five minutes without a caller-supplied deadline.
- When the archive is already complete, `sync --full` reuses backlog markers and limits steady-state refresh to live top-level channels plus active threads.
## See also
- [Sync sources](../guides/sync-sources.html)
- [`tail`](tail.html)
- [`update`](update.html)

33
docs/commands/tail.md Normal file
View File

@ -0,0 +1,33 @@
# `tail`
Runs the live Discord Gateway tail and a periodic repair loop.
## Usage
```bash
discrawl tail
discrawl tail --guild 123456789012345678
discrawl tail --repair-every 30m
```
## What it does
- connects to the Discord Gateway with the configured bot token
- writes new messages, edits, and deletes into the local archive as they arrive
- periodically runs a repair pass to catch anything the live stream missed
## Flags
- `--guild <id>` / `--guilds <id,id>` - tail a specific guild scope (default: `default_guild_id`, or all discovered guilds if unset)
- `--repair-every <duration>` - frequency of the repair sweep
## Notes
- requires a working Discord bot token
- not available in Git-only mode (`discord.token_source = "none"`)
- terminates cleanly on SIGINT / SIGTERM and treats cancellation as normal exit
## See also
- [`sync`](sync.html)
- [Bot setup](../bot-setup.html)

47
docs/commands/tui.md Normal file
View File

@ -0,0 +1,47 @@
# `tui`
Opens the local terminal archive browser for stored messages.
## Usage
```bash
discrawl tui
discrawl tui --guild 123456789012345678 --channel general
discrawl tui --guilds 123,456 --author 1456464433768300635
discrawl tui --dm
discrawl --json tui --limit 50
```
## What it shows
The browser uses the shared crawlkit explorer:
- left pane: channel, person, or thread groups
- middle pane: newest matching message rows
- right pane: selected message detail, attachments, replies, and thread context
- footer: local DB or remote Git snapshot source
Mouse selection, right-click actions, sortable headers, refresh, and chat layout match the other crawlkit-backed archive tools.
## Flags
- `--guild <id>` / `--guilds <id,id>` - restrict the guild scope
- `--dm` - browse local direct messages under the synthetic `@me` guild
- `--channel <id|name|#name>` - restrict to one channel or DM conversation
- `--author <id|name>` - restrict to one author
- `--limit <n>` - newest rows to load (default 200)
- `--include-empty` - include rows with no displayable/searchable content
- `--json` - print crawlkit browser rows as JSON instead of opening the TUI
## Notes
- `tui` is read-only.
- without `--guild`, `--guilds`, or `--dm`, it uses `default_guild_id` when configured; otherwise it can browse all stored guild rows
- `--dm` only shows messages imported from the local Discord Desktop cache by [`wiretap`](wiretap.html)
- `--json` is useful for launchers and agents that want the same row shape without an interactive terminal
## See also
- [`messages`](messages.html)
- [`dms`](dms.html)
- [`wiretap`](wiretap.html)

36
docs/commands/update.md Normal file
View File

@ -0,0 +1,36 @@
# `update`
Forces a Git snapshot pull and import.
Routine imports are delta-planned from crawlkit shard fingerprints, with a Git-object fallback for older manifests. The usual publish only imports changed tail shards; unsafe table changes fall back to a full import.
## Usage
```bash
discrawl update
discrawl update --repo ~/.discrawl/share --remote https://github.com/example/discord-archive.git
discrawl update --with-embeddings
```
## Flags
- `--repo <path>` - local snapshot repo path (defaults to `[share].repo_path`)
- `--remote <url>` - target Git remote (defaults to `[share].remote`)
- `--branch <name>` - snapshot branch (defaults to `[share].branch`)
- `--with-embeddings` - also import vectors that match your local `[search.embeddings]` identity
## When to use it
- you have `share.remote` configured and want a fresh shard-delta import before running a command that does not auto-update (`sync` does not auto-import unless `--update=auto` is passed)
- you set `--no-auto-update` when subscribing and want to refresh on demand
- a CI job already imported the latest snapshot but read commands still consider it stale
## How `sync` interacts
`discrawl sync` does **not** auto-import the share unless `--update=auto` (only when stale) or `--update=force` (always). Routine live refreshes stay fast; explicit imports happen via `update`.
## See also
- [Git snapshots guide](../guides/git-snapshots.html)
- [`subscribe`](subscribe.html)
- [`sync`](sync.html)

47
docs/commands/wiretap.md Normal file
View File

@ -0,0 +1,47 @@
# `wiretap`
Imports classifiable Discord Desktop message payloads into the same local SQLite archive.
This is the path for searchable DMs because bot tokens cannot read personal direct messages.
`wiretap` is also available through `discrawl sync --source wiretap` and is included in the default `discrawl sync --source both` path.
## Usage
```bash
discrawl wiretap
discrawl wiretap --path "$HOME/Library/Application Support/discord"
discrawl wiretap --dry-run
discrawl wiretap --full-cache
discrawl wiretap --watch-every 2m
```
## Flags
- `--path <dir>` - override the desktop data directory (default: platform-specific Discord cache path)
- `--dry-run` - report what would be imported without writing anything
- `--full-cache` - exhaustive Chromium HTTP cache import for historical guild-cache archaeology (slower)
- `--watch-every <duration>` - keep importing on a periodic loop
- `--max-file-bytes <n>` - skip unusually large files (default 64 MiB)
## Notes
- stores classifiable cache messages in the same `guilds`, `channels`, and `messages` tables used by bot sync
- stores proven DMs under the synthetic guild id `@me`
- `@me` rows stay local-only: never exported to `publish` / Git snapshot import / embedding snapshots
- preserves existing local `@me` rows when importing a Git snapshot
- drops message payloads whose channel cannot be classified from cached channel metadata or Discord route URLs; dropped rows are counted as `skipped_messages`
- imports what Discord Desktop has cached locally, not complete live DM history
- scans local `.ldb`, `.log`, `.json`, and `.txt` artifacts for Discord message JSON, plus route-bearing Chromium HTTP cache entries by default
- does not extract, store, or print Discord auth tokens
## Default desktop paths
- macOS: `~/Library/Application Support/discord`
- Linux: `~/.config/discord`
## See also
- [Wiretap guide](../guides/wiretap.html)
- [`dms`](dms.html)
- [`sync`](sync.html)

77
docs/configuration.md Normal file
View File

@ -0,0 +1,77 @@
# Configuration
`discrawl init` writes a complete config so most users do not hand-edit anything initially. This page documents the full shape and override rules for when you do.
## File layout
```toml
version = 1
default_guild_id = ""
guild_ids = []
db_path = "~/.discrawl/discrawl.db"
cache_dir = "~/.discrawl/cache"
log_dir = "~/.discrawl/logs"
[discord]
token_source = "env" # use "none" for Git-only read access
token_env = "DISCORD_BOT_TOKEN"
token_keyring_service = "discrawl"
token_keyring_account = "discord_bot_token"
[sync]
source = "both" # "discord" for bot-only sync, "wiretap" for desktop-cache-only import
concurrency = 16
repair_every = "6h"
full_history = true
attachment_text = true
[desktop]
path = "~/.config/discord" # macOS default: "~/Library/Application Support/discord"
max_file_bytes = 67108864
full_cache = false
[search]
default_mode = "fts"
[search.embeddings]
enabled = false
provider = "openai"
model = "text-embedding-3-small"
api_key_env = "OPENAI_API_KEY"
batch_size = 64
[share]
remote = ""
repo_path = "~/.discrawl/share"
branch = "main"
auto_update = true
stale_after = "15m"
```
`concurrency` is auto-sized at `init` to `min(32, max(8, GOMAXPROCS*2))`.
## Token resolution
In order:
1. `DISCORD_BOT_TOKEN`, or the env var named in `discord.token_env`
2. OS keyring item `discrawl` / `discord_bot_token`, or the configured keyring service/account
`discrawl` accepts either raw token text or a value prefixed with `Bot `. Normalization is automatic.
Set `discord.token_source = "keyring"` if you want to require keyring lookup and skip env entirely. Set it to `"none"` for a Git-only reader.
## Override rules
- `--config <path>` beats everything
- `DISCRAWL_CONFIG=<path>` overrides the default config path
- `discord.token_source = "none"` disables live Discord access for Git-only readers
- `discord.token_source = "keyring"` skips env lookup
- `DISCRAWL_NO_AUTO_UPDATE=1` disables Git snapshot auto-update for read commands in one process
## Notes
- `default_guild_id` is the implicit scope for `sync`, `tail`, `digest`, and `analytics` when `--guild` is not passed
- `guild_ids` is reserved for explicit multi-guild fan-out; usually you do not set this directly
- changing `[search.embeddings]` provider/model/input version retargets pending jobs and resets prior attempts; existing vectors for another identity remain in SQLite but are not used for semantic search
- changing `db_path` does not migrate existing data; copy the file yourself if you want to keep history

6
docs/contact.md Normal file
View File

@ -0,0 +1,6 @@
# Contact
Discord archive search and analysis tooling.
- Source: [github.com/openclaw/discrawl](https://github.com/openclaw/discrawl)
- Issues: [github.com/openclaw/discrawl/issues](https://github.com/openclaw/discrawl/issues)

View File

@ -0,0 +1,51 @@
# Data layout
Everything lives in one local SQLite file. Default path: `~/.discrawl/discrawl.db`.
## What is stored
- guild metadata
- channels and threads in one table (Discord models threads as channels)
- current member snapshot
- canonical message rows
- append-only message event records
- FTS5 index rows
- optional local embedding queue metadata and vectors
Messages imported from Discord Desktop use the same message, attachment, mention, and FTS paths as bot-synced messages.
## DMs
Proven DMs use the synthetic guild id `@me`. Unclassifiable desktop-cache payloads are skipped instead of being stored as unknown synthetic data.
## Attachments
Attachment binaries are not stored in SQLite. Only attachment metadata, filenames, and (optionally) extracted text.
Set `sync.attachment_text = false` if you want to keep attachment metadata and filenames but disable attachment body fetches for text indexing.
## Multi-guild ready
The schema is multi-guild ready even when the common UX stays single-guild simple. Threads are stored as channels because that matches the Discord model. Archived threads are part of the sync surface.
## Schema migrations
SQLite schema migrations are versioned with `PRAGMA user_version`. Startup fails fast when a local DB schema is newer than the supported binary - that means you have a binary older than the database.
## Querying directly
Anything you want, with read-only SQL:
```bash
discrawl sql 'select count(*) as messages from messages'
echo 'select guild_id, count(*) from messages group by guild_id' | discrawl sql -
```
See [`sql`](../commands/sql.html).
## See also
- [`status`](../commands/status.html) - high-level archive status
- [`channels`](../commands/channels.html) - channel directory
- [`members`](../commands/members.html) - member directory
- [Security](../security.html)

68
docs/guides/embeddings.md Normal file
View File

@ -0,0 +1,68 @@
# Embeddings
Embeddings are optional. FTS is the default search path and the primary verification target. Embeddings enrich recall in background batches; they do not block the hot sync path.
## Quick path
```bash
export OPENAI_API_KEY="..."
discrawl init --with-embeddings
discrawl sync --with-embeddings
discrawl embed --limit 1000
discrawl search --mode semantic "launch checklist"
discrawl search --mode hybrid "launch checklist"
```
## Two-phase pipeline
1. **Queue** - `sync --with-embeddings` writes `embedding_jobs` rows for new messages, changed normalized text, and messages without an existing job. The embedding provider is **not** called in this phase.
2. **Drain** - `discrawl embed` claims pending jobs with a short lock so overlapping runs do not process the same batch. It calls the configured provider, writes vectors to `message_embeddings` with provider, model, input version, dimensions, and binary vector data.
Behavior during drain:
- rate limits requeue the batch and stop that drain run cleanly
- provider or validation failures retry up to three attempts before marking the job failed
- messages with no normalized text are marked done and any stale vector for that message is removed
## Identity (provider, model, input version)
Stored on each job and vector. If you change provider or model:
- pending jobs are retargeted to the new identity
- prior attempts are reset
- existing vectors for another identity remain in SQLite but are not used for semantic search
Use `--rebuild` when you want to regenerate vectors for the existing archive after a config change:
```bash
discrawl embed --rebuild --limit 1000
```
## Local provider example
```toml
[search.embeddings]
enabled = true
provider = "ollama"
model = "nomic-embed-text"
```
With local providers, message and query embedding both happen on the same machine. With remote providers, message text is sent during `discrawl embed`, and search query text is sent during `--mode semantic` or `--mode hybrid` calls.
## Git snapshot interaction
By default, `publish` does not export embeddings. Use `--with-embeddings`:
```bash
discrawl publish --with-embeddings --push
discrawl subscribe --with-embeddings https://github.com/example/discord-archive.git
discrawl update --with-embeddings
```
The snapshot stores vectors under `embeddings/<provider>/<model>/<input_version>/...` and records that identity in `manifest.json`. Only vectors for non-DM messages are exported. Import only restores matching embedding manifests, so an Ollama/nomic subscriber does not accidentally import OpenAI/text-embedding vectors. `embedding_jobs` is never exported; subscribers that want fresh local vectors run `discrawl embed --rebuild`. Publishing without `--with-embeddings` omits embedding manifests instead of carrying forward an older bundle.
## See also
- [Search modes](search-modes.html)
- [`embed`](../commands/embed.html)
- [Configuration](../configuration.html)

View File

@ -0,0 +1,84 @@
# Git-backed snapshots
Discrawl can publish the SQLite archive as sharded, compressed NDJSON snapshots in a private Git repo, then auto-import that repo before local read commands. This gives readers org memory without Discord credentials.
Snapshot packing/import and git mirror mechanics are shared through
`crawlkit`. Discrawl still owns Discord-specific privacy policy: `@me` direct
messages, wiretap sync state, and local-only desktop rows are excluded from
published snapshots and are preserved locally on import.
## Publisher
```bash
discrawl publish --remote https://github.com/example/discord-archive.git --push
discrawl publish --readme path/to/discord-backup/README.md --push
```
The publisher uses your existing bot-synced archive. It exports non-DM tables only.
## Subscriber
```bash
discrawl subscribe https://github.com/example/discord-archive.git
discrawl search "launch checklist"
discrawl messages --channel general --hours 24
```
`subscribe` is the Git-only setup path. It writes a config with `discord.token_source = "none"`, imports the snapshot, and does not require a Discord bot token. `sync` and `tail` remain disabled in this mode because they need live Discord access.
## Auto-update
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`):
```bash
discrawl subscribe --stale-after 15m https://github.com/example/discord-archive.git
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
```
`discrawl update` forces the same pull/import step manually. Snapshot imports are delta-planned from crawlkit shard fingerprints. Older manifests without those fields fall back to Git blob identity, so the common publish shape only imports the changed message tail shard plus small cursor tables. Unsafe table-shape changes still fall back to a full import.
`discrawl sync` does **not** auto-import the share unless `--update=auto` or `--update=force` is provided, so routine live refreshes stay fast.
## Hybrid mode
Keep normal Discord credentials configured **and** set `share.remote`:
```bash
discrawl sync --update=auto # import snapshot delta first, then live deltas
discrawl messages --sync # blocking pre-query sync for matched scope
discrawl sync --all-channels # broader live repair
discrawl sync --full # historical backfill
```
## What is published
- non-DM archive tables (DM `@me` rows are always excluded)
- README activity block - latest update time, latest archived message, archive totals, day/week/month activity
- `embedding_jobs` is never exported
## Backing up vectors
```bash
discrawl publish --with-embeddings --push
discrawl subscribe --with-embeddings https://github.com/example/discord-archive.git
discrawl update --with-embeddings
```
Stored under `embeddings/<provider>/<model>/<input_version>/...`. Import only restores matching identities; Ollama/nomic subscribers do not accidentally pick up OpenAI/text-embedding vectors. Publishing without `--with-embeddings` omits embedding manifests instead of carrying forward an older bundle.
## CI
The Docker smoke test installs `discrawl` in a clean Go container, subscribes to a Git snapshot repo, then checks `search`, `messages`, `sql`, and `report`:
```bash
DISCRAWL_DOCKER_TEST=1 go test ./internal/cli -run TestDockerGitSourceSmoke -count=1
```
The backup workflows restore and save `.discrawl-ci/discrawl.db` with `actions/cache`. On a warm runner cache, scheduled publishers skip the pre-sync snapshot import and go straight to the live latest-message delta before publishing. Cache misses still import the latest published snapshot first so `--latest-only` has channel cursors to resume from.
## See also
- [`publish`](../commands/publish.html)
- [`subscribe`](../commands/subscribe.html)
- [`update`](../commands/update.html)
- [`report`](../commands/report.html)

View File

@ -0,0 +1,57 @@
# Search modes
`discrawl search` has three modes. FTS is the default and works with no embeddings.
## Modes
- **`fts`** (default) - searches the local SQLite FTS5 index, returns newest matching messages first
- **`semantic`** - embeds the query, scores against locally stored message vectors; errors out cleanly if embeddings are disabled or no compatible vectors exist
- **`hybrid`** - runs FTS and semantic, deduplicates by message id, falls back to FTS when semantic is unavailable
## FTS details
- backed by SQLite FTS5 with the default `unicode61` tokenizer
- user query terms are parameterized and quoted before `MATCH`, so tokens like `AND`, `OR`, `NOT`, `NEAR`, and `*` are searched as input terms instead of FTS operators
- punctuation still follows FTS5 tokenization rules
- by default, `search` skips rows with no searchable content (attachment text, attachment filenames, embeds, and replies still count as content); use `--include-empty` to opt back in
## Semantic and hybrid prerequisites
- `[search.embeddings]` configured in `~/.discrawl/config.toml`
- local `message_embeddings` rows for the configured provider, model, and input version
- input version is currently `message_normalized_v1`, so vectors are tied to normalized message text rather than raw Discord payloads
Two-phase embedding creation:
1. `discrawl sync --with-embeddings` queues changed messages by writing `embedding_jobs` rows. New messages, changed normalized text, and messages without an existing job are queued. This phase does not call the embedding provider.
2. `discrawl embed` drains pending jobs in bounded batches, calls the configured provider, and writes vectors to `message_embeddings`.
## Provider/model identity
The provider/model/input-version identity is stored on each job and vector. If you change provider or model, pending jobs are retargeted to the new identity and prior attempts are reset. Existing vectors for another identity remain in SQLite, but semantic search only reads vectors compatible with the current config.
Use `--rebuild` when changing provider, model, or input settings and you want to regenerate vectors for the existing archive.
## Local vs remote providers
Local providers like Ollama keep both message and query embedding on the same machine. With remote providers (OpenAI, etc.), message text is sent during `discrawl embed`, and search query text is sent when using `--mode semantic` or `--mode hybrid`. Stored message text is not sent during local vector scoring.
## Examples
```bash
discrawl search "panic: nil pointer"
discrawl search --mode fts "panic: nil pointer"
discrawl search --mode semantic "missing launch checklist"
discrawl search --mode hybrid "database timeout"
discrawl search --guild 123456789012345678 "payment failed"
discrawl search --dm "launch checklist"
discrawl search --channel billing --author steipete --limit 50 "invoice"
discrawl search --include-empty "GitHub"
discrawl --json search "websocket closed"
```
## See also
- [`search`](../commands/search.html)
- [`embed`](../commands/embed.html)
- [Embeddings](embeddings.html)

View File

@ -0,0 +1,57 @@
# Sync sources
Discrawl reads from two local archive sources. Either or both can run in a single `sync`.
## Sources
| Source | Reads from | Stores |
| --- | --- | --- |
| `both` | Discord bot API and local Discord Desktop cache | bot-visible guild data plus classifiable cached desktop messages |
| `discord` / `key` / `bot` / `api` | Discord bot API | guilds, channels, threads, members, and messages the bot can access |
| `wiretap` / `desktop` / `cache` | local Discord Desktop cache files | classifiable cached messages; proven DMs are stored under `@me` |
The default is `both`. Pick one with `--source` or by setting `[sync].source` in config.
## Bot sync modes
Sync modes control the Discord bot API side of a run. When `wiretap` is selected, the desktop cache import runs once alongside the chosen bot sync mode.
| Command | Use when | Behavior |
| --- | --- | --- |
| `discrawl sync` | routine refresh | skips member refreshes, checks live top-level channels plus active threads, only fetches new messages for channels with a stored latest cursor |
| `discrawl sync --update=auto` | hybrid Git/live refresh | imports a stale Git snapshot first, usually as a changed-shard delta, then runs the routine live refresh |
| `discrawl sync --all-channels` | repair pass | broad incremental sweep across every stored channel/thread, including archived threads |
| `discrawl sync --full` | historical backfill | crawls older history until channels are complete; can take a long time on large servers |
Run one explicit `--full` pass when you want a complete historical guild archive. Use plain `sync` afterward for frequent latest-message and desktop-cache refreshes.
## Concurrency
`sync` already uses parallel channel workers for bot API message crawling. The default is auto-sized from `GOMAXPROCS` with a floor of `8` and a cap of `32`. Override with `--concurrency`.
## Targeting
- `--guild <id>` runs only that guild
- `--guilds 123,456` runs an explicit set
- `--all` ignores `default_guild_id` and fans out across every discovered guild
- `--channels 111,222` targets specific channels (forum ids expand to their threads)
- `--since <RFC3339>` limits initial history and `--full` backfill to messages at or after the timestamp; older history is not marked complete, so a later `sync --full` without `--since` can continue the backfill
## Performance and resilience
- Long runs emit periodic progress logs to stderr.
- If in-flight channels stop completing for a while, `discrawl` emits `message sync waiting` heartbeat logs with the oldest active channel, per-channel page activity, and skip/defer counters.
- Every run ends with a `message sync finished` summary.
- Each channel crawl has a bounded runtime budget; pathological channels are deferred and retried on the next sync.
- Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl.
- Marker cleanup is best-effort, so one missing local sync-state row cannot crash the run.
- Full sync member refresh is best-effort and gives up after five minutes without a caller-supplied deadline, so message sync completion is not held hostage by a slow guild member crawl.
- When the archive is already complete, `sync --full` reuses backlog markers and limits steady-state refresh to live top-level channels plus active threads instead of revisiting every stored archived thread.
- If a guild already has a local member snapshot, routine syncs reuse it and skip another full member crawl until that snapshot ages out.
## See also
- [`sync`](../commands/sync.html)
- [`tail`](../commands/tail.html)
- [Wiretap](wiretap.html)
- [Git snapshots](git-snapshots.html)

61
docs/guides/wiretap.md Normal file
View File

@ -0,0 +1,61 @@
# Desktop wiretap
`wiretap` imports classifiable Discord Desktop message payloads into the same local SQLite archive used by bot sync. It is the path for searchable DMs because bot tokens cannot read personal direct messages.
`wiretap` is also available through `discrawl sync --source wiretap` and is included in the default `discrawl sync --source both` path.
## What it does
- stores classifiable cache messages in the same `guilds`, `channels`, and `messages` tables used by bot sync
- stores proven DMs under the synthetic guild id `@me`
- preserves existing local `@me` guilds, channels, messages, and attachments when importing a Git snapshot, so a shared guild mirror refresh does not wipe local wiretap DM search
- drops message payloads whose channel cannot be classified from cached channel metadata or Discord route URLs; dropped rows are counted as `skipped_messages`
- imports what Discord Desktop has cached locally - not complete live DM history
## What it does not do
- does not extract, store, or print Discord auth tokens
- does not use a user token
- does not call the Discord API as your user
- does not run as a selfbot
## DM privacy: `@me` stays local
`@me` rows are local-only. Excluded from:
- `publish` (Git snapshot output)
- `subscribe` / Git snapshot import
- `--with-embeddings` snapshot export
Excluded categories: DM guilds, channels, messages, events, attachments, mentions, wiretap sync state, and vectors for DM messages.
## What gets scanned
- local `.ldb`, `.log`, `.json`, and `.txt` artifacts for Discord message JSON
- route-bearing Chromium HTTP cache entries by default
- `--full-cache` (or `desktop.full_cache = true`) enables exhaustive Chromium cache import for slower historical guild-cache archaeology
- `--max-file-bytes` skips unusually large files (default 64 MiB)
## Flags
```bash
discrawl wiretap
discrawl wiretap --path "$HOME/Library/Application Support/discord"
discrawl wiretap --dry-run
discrawl wiretap --full-cache
discrawl wiretap --watch-every 2m
```
`--watch-every` keeps the import running on a periodic loop. `--dry-run` reports what would be imported without writing anything.
## Default desktop paths
- macOS: `~/Library/Application Support/discord`
- Linux: `~/.config/discord`
- override via `--path` or `[desktop].path`
## See also
- [`wiretap`](../commands/wiretap.html)
- [`dms`](../commands/dms.html) - convenience layer over `@me`
- [Sync sources](sync-sources.html)

13
docs/index.html Normal file
View File

@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="refresh" content="0; url=README.html">
<link rel="canonical" href="README.html">
<title>Discrawl docs</title>
</head>
<body>
<p><a href="README.html">Discrawl docs</a></p>
</body>
</html>

66
docs/install.md Normal file
View File

@ -0,0 +1,66 @@
# Install
Discrawl is a single Go binary. Install via Homebrew or build from source.
## Homebrew
```bash
brew install steipete/tap/discrawl
discrawl --version
```
The tap auto-installs from `steipete/tap`.
## From source
Requires Go `1.26+`.
```bash
git clone https://github.com/openclaw/discrawl.git
cd discrawl
go build -o bin/discrawl ./cmd/discrawl
./bin/discrawl --version
```
If you do not put `discrawl` on `PATH`, replace `discrawl` with `./bin/discrawl` in any example below.
## Quick start (with bot token)
```bash
export DISCORD_BOT_TOKEN="your-bot-token"
discrawl init
discrawl doctor
discrawl sync --full
discrawl sync
discrawl search "panic: nil pointer"
discrawl tail
```
`init` discovers accessible guilds and writes `~/.discrawl/config.toml`. If exactly one guild is available, it becomes the default automatically.
`doctor` verifies the config loads, the token resolves, the bot can reach the Gateway, and the local DB and FTS index are wired up.
## Quick start (Git-only reader)
No Discord credentials required. You read a private Git snapshot another machine published.
```bash
discrawl subscribe https://github.com/example/discord-archive.git
discrawl search "launch checklist"
discrawl messages --channel general --hours 24
```
`subscribe` writes a token-free config (`discord.token_source = "none"`) and imports the snapshot. Read commands auto-refresh when the local snapshot is older than `15m`.
## Default runtime paths
- config: `~/.discrawl/config.toml`
- database: `~/.discrawl/discrawl.db`
- cache: `~/.discrawl/cache/`
- logs: `~/.discrawl/logs/`
## Next steps
- [Bot setup](bot-setup.html) - intents, permissions, token sources
- [Configuration](configuration.html) - the full TOML shape and override rules
- [`sync`](commands/sync.html) - the main archive command

49
docs/security.md Normal file
View File

@ -0,0 +1,49 @@
# Security
## Tokens and credentials
- Do not commit bot tokens or API keys.
- Default config lives in your home directory, not inside the repo.
- Prefer env vars or the OS keyring for bot tokens.
- `discrawl doctor` reports the token source (env or keyring), not token contents.
## Wiretap is local-only
`wiretap` reads local Discord Desktop cache files only. It does not:
- extract, store, or print Discord auth tokens
- use a user token
- call the Discord API as your user
- run as a selfbot
Wiretap DM messages stay local. They are stored under the synthetic guild id `@me` and are never exported to:
- `publish` (Git snapshot output)
- `subscribe` / Git snapshot import
- the optional `--with-embeddings` snapshot export
A shared guild mirror refresh does not wipe local wiretap DM search either - import preserves existing local `@me` guilds, channels, messages, and attachments.
## CI
CI runs secret scanning with `gitleaks` against git history and the working tree.
## What is stored locally
- guild metadata
- channels and threads (one table)
- current member snapshot
- canonical message rows
- append-only message event records
- FTS index rows
- optional local embedding queue metadata and vectors
Attachment binaries are not stored in SQLite. Only attachment metadata and (optionally) extracted text.
Set `sync.attachment_text = false` if you want to keep attachment metadata and filenames but disable attachment body fetches for text indexing.
## What is sent over the wire
With remote embedding providers, message text is sent during `discrawl embed`, and search query text is sent when using `--mode semantic` or `--mode hybrid`. Stored message text is not sent during local vector scoring.
Local providers like Ollama keep both message and query embedding on the same machine.

BIN
docs/social-card.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

79
docs/social-card.svg Normal file
View File

@ -0,0 +1,79 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630" viewBox="0 0 1200 630" role="img" aria-labelledby="title desc">
<title id="title">Discrawl social card</title>
<desc id="desc">Discrawl mirrors Discord into SQLite for local search and analysis.</desc>
<defs>
<linearGradient id="bg" x1="0" y1="0" x2="1" y2="1">
<stop offset="0" stop-color="#0b0f16"/>
<stop offset="0.58" stop-color="#111723"/>
<stop offset="1" stop-color="#151827"/>
</linearGradient>
<linearGradient id="accent" x1="0" y1="0" x2="1" y2="0">
<stop offset="0" stop-color="#5fe3d4"/>
<stop offset="0.56" stop-color="#a594ff"/>
<stop offset="1" stop-color="#f364a2"/>
</linearGradient>
<linearGradient id="terminal" x1="0" y1="0" x2="0" y2="1">
<stop offset="0" stop-color="#161d2a"/>
<stop offset="1" stop-color="#0f141d"/>
</linearGradient>
<filter id="shadow" x="-10%" y="-15%" width="120%" height="130%">
<feDropShadow dx="0" dy="22" stdDeviation="22" flood-color="#000000" flood-opacity="0.45"/>
</filter>
<filter id="softGlow" x="-40%" y="-40%" width="180%" height="180%">
<feGaussianBlur stdDeviation="36"/>
</filter>
</defs>
<rect width="1200" height="630" fill="url(#bg)"/>
<circle cx="1030" cy="92" r="210" fill="#5fe3d4" opacity="0.11" filter="url(#softGlow)"/>
<circle cx="104" cy="568" r="240" fill="#f364a2" opacity="0.10" filter="url(#softGlow)"/>
<path d="M0 515 C190 438 330 548 512 472 S874 330 1200 410 L1200 630 L0 630 Z" fill="#0a0d13" opacity="0.55"/>
<path d="M0 534 C206 456 338 570 520 492 S884 360 1200 438" fill="none" stroke="url(#accent)" stroke-width="3" opacity="0.44"/>
<g transform="translate(72 70)">
<rect x="0" y="0" width="112" height="112" rx="22" fill="#0c0f14" stroke="#253244" stroke-width="2"/>
<rect x="23" y="28" width="66" height="47" rx="5" fill="none" stroke="#5fe3d4" stroke-width="4"/>
<line x1="23" y1="43" x2="89" y2="43" stroke="#5fe3d4" stroke-width="3"/>
<circle cx="33" cy="36" r="2.8" fill="#f364a2"/>
<circle cx="43" cy="36" r="2.8" fill="#f7c177"/>
<circle cx="53" cy="36" r="2.8" fill="#5fe3d4"/>
<text x="31" y="59" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="10" font-weight="800" fill="#5fe3d4">SELECT</text>
<text x="31" y="71" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="10" font-weight="800" fill="#aab3c1">msgs</text>
<rect x="23" y="84" width="66" height="6" rx="3" fill="#161b24"/>
<rect x="23" y="84" width="42" height="6" rx="3" fill="#5fe3d4"/>
</g>
<text x="205" y="126" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="28" font-weight="800" letter-spacing="2" fill="#5fe3d4">discrawl.sh</text>
<text x="72" y="248" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="96" font-weight="800" letter-spacing="-3" fill="#edf4fb">Discord history,</text>
<text x="72" y="346" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="96" font-weight="800" letter-spacing="-3" fill="#edf4fb">local answers.</text>
<text x="74" y="410" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="30" font-weight="560" fill="#aab3c1">Mirror Discord into SQLite.</text>
<text x="74" y="450" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="30" font-weight="560" fill="#aab3c1">Search, query, tail, and analyze locally.</text>
<g transform="translate(72 505)">
<rect x="0" y="0" width="210" height="54" rx="10" fill="#5fe3d4"/>
<text x="28" y="35" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="20" font-weight="900" fill="#081016">discrawl sync</text>
<rect x="230" y="0" width="228" height="54" rx="10" fill="#151d29" stroke="#263448" stroke-width="2"/>
<text x="258" y="35" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="20" font-weight="800" fill="#f364a2">discrawl search</text>
</g>
<g transform="translate(742 135)" filter="url(#shadow)">
<rect x="0" y="0" width="386" height="330" rx="20" fill="url(#terminal)" stroke="#263448" stroke-width="2"/>
<rect x="0" y="0" width="386" height="54" rx="20" fill="#121925"/>
<path d="M0 34 Q0 0 34 0 H352 Q386 0 386 34 V54 H0 Z" fill="#121925"/>
<circle cx="30" cy="27" r="7" fill="#f364a2"/>
<circle cx="54" cy="27" r="7" fill="#f7c177"/>
<circle cx="78" cy="27" r="7" fill="#5fe3d4"/>
<text x="112" y="34" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="16" font-weight="800" fill="#657287">sqlite archive</text>
<text x="28" y="95" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="20" font-weight="800" fill="#5fe3d4">$ discrawl wiretap</text>
<text x="28" y="132" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="650" fill="#6f7b8d">dm cache imported: 814</text>
<text x="28" y="180" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="20" font-weight="800" fill="#5fe3d4">$ discrawl sql</text>
<text x="28" y="218" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="650" fill="#edf4fb">312k messages</text>
<text x="28" y="255" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="650" fill="#edf4fb">14k attachments</text>
<text x="28" y="292" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="650" fill="#edf4fb">FTS5 ready</text>
<rect x="286" y="260" width="72" height="10" rx="5" fill="#263448"/>
<rect x="312" y="282" width="46" height="10" rx="5" fill="#263448"/>
<rect x="298" y="304" width="60" height="10" rx="5" fill="#263448"/>
</g>
<text x="72" y="600" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="800" fill="#657287">bot sync + desktop wiretap + FTS5 + semantic search</text>
</svg>

After

Width:  |  Height:  |  Size: 5.9 KiB

48
go.mod
View File

@ -1,28 +1,58 @@
module github.com/steipete/discrawl
module github.com/openclaw/discrawl
go 1.26.2
go 1.26.3
require (
github.com/bwmarrin/discordgo v0.29.0
github.com/gorilla/websocket v1.5.3
github.com/pelletier/go-toml/v2 v2.3.0
github.com/stretchr/testify v1.11.1
golang.org/x/text v0.35.0
modernc.org/sqlite v1.49.1
github.com/zalando/go-keyring v0.2.8
golang.org/x/sys v0.43.0
golang.org/x/text v0.36.0
)
require (
github.com/charmbracelet/bubbles v1.0.0 // indirect
github.com/clipperhouse/displaywidth v0.11.0 // indirect
github.com/clipperhouse/uax29/v2 v2.7.0 // indirect
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
modernc.org/sqlite v1.50.0 // indirect
)
require (
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/charmbracelet/bubbletea v1.3.10 // indirect
github.com/charmbracelet/colorprofile v0.4.1 // indirect
github.com/charmbracelet/lipgloss v1.1.0 // indirect
github.com/charmbracelet/x/ansi v0.11.7 // indirect
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
github.com/charmbracelet/x/term v0.2.2 // indirect
github.com/danieljoos/wincred v1.2.3 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/godbus/dbus/v5 v5.2.2 // indirect
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/lucasb-eyer/go-colorful v1.4.0 // indirect
github.com/mattn/go-isatty v0.0.22 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.23 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
github.com/ncruces/go-strftime v1.0.0 // indirect
github.com/openclaw/crawlkit v0.5.0
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
golang.org/x/crypto v0.49.0 // indirect
golang.org/x/sys v0.42.0 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/crypto v0.50.0 // indirect
golang.org/x/tools v0.44.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
modernc.org/libc v1.72.0 // indirect
modernc.org/libc v1.72.1 // indirect
modernc.org/mathutil v1.7.1 // indirect
modernc.org/memory v1.11.0 // indirect
)

118
go.sum
View File

@ -1,11 +1,38 @@
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/bwmarrin/discordgo v0.29.0 h1:FmWeXFaKUwrcL3Cx65c20bTRW+vOb6k8AnaP+EgjDno=
github.com/bwmarrin/discordgo v0.29.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY=
github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc=
github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk=
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
github.com/charmbracelet/x/ansi v0.11.7 h1:kzv1kJvjg2S3r9KHo8hDdHFQLEqn4RBCb39dAYC84jI=
github.com/charmbracelet/x/ansi v0.11.7/go.mod h1:9qGpnAVYz+8ACONkZBUWPtL7lulP9No6p1epAihUZwQ=
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8=
github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0=
github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk=
github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/danieljoos/wincred v1.2.3 h1:v7dZC2x32Ut3nEfRH+vhoZGvN72+dQ/snVXo/vMFLdQ=
github.com/danieljoos/wincred v1.2.3/go.mod h1:6qqX0WNrS4RzPZ1tnroDzq9kY3fu1KwE7MRLQK4X0bs=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ=
github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 h1:EwtI+Al+DeppwYX2oXJCETMO23COyaKGP6fHVpkpWpg=
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
@ -13,45 +40,80 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lucasb-eyer/go-colorful v1.4.0 h1:UtrWVfLdarDgc44HcS7pYloGHJUjHV/4FwW4TvVgFr4=
github.com/lucasb-eyer/go-colorful v1.4.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw=
github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM=
github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/openclaw/crawlkit v0.5.0 h1:sVqIbQ5v6LiOf+NXcVj93UhfoaJqMbBlrd1lU6uhO9M=
github.com/openclaw/crawlkit v0.5.0/go.mod h1:/AI8o/DeRqXPZJPHq/9mGUjNzLPskm/wTjikRPxEdHY=
github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc=
github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs=
github.com/zalando/go-keyring v0.2.8/go.mod h1:tsMo+VpRq5NGyKfxoBVjCuMrG47yj8cmakZDO5QGii0=
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM=
golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c=
golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U=
modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8=
modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU=
modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0=
modernc.org/cc/v4 v4.28.1 h1:XpLbkYVQ24E8tX5u8+yWGvaxerxkR/S4zqxI8ZoSBuc=
modernc.org/cc/v4 v4.28.1/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI=
modernc.org/ccgo/v4 v4.33.0 h1:dspBCm75jsj8Y/ufwAMVfe375L2iYdMyQ2QG/v3hL54=
modernc.org/ccgo/v4 v4.33.0/go.mod h1:+RhXBoRYzRwaH21mV/aj6XvQRDtfjcZfAlPMsQo8CR0=
modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
@ -60,18 +122,18 @@ modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo=
modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c=
modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ=
modernc.org/libc v1.72.1 h1:db1xwJ6u1kE3KHTFTTbe2GCrczHPKzlURP0aDC4NGD0=
modernc.org/libc v1.72.1/go.mod h1:HRMiC/PhPGLIPM7GzAFCbI+oSgE3dhZ8FWftmRrHVlY=
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg=
modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
modernc.org/sqlite v1.49.1 h1:dYGHTKcX1sJ+EQDnUzvz4TJ5GbuvhNJa8Fg6ElGx73U=
modernc.org/sqlite v1.49.1/go.mod h1:m0w8xhwYUVY3H6pSDwc3gkJ/irZT/0YEXwBlhaxQEew=
modernc.org/sqlite v1.50.0 h1:eMowQSWLK0MeiQTdmz3lqoF5dqclujdlIKeJA11+7oM=
modernc.org/sqlite v1.50.0/go.mod h1:m0w8xhwYUVY3H6pSDwc3gkJ/irZT/0YEXwBlhaxQEew=
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=

View File

@ -1,27 +1,42 @@
package cli
import (
"context"
"errors"
"flag"
"fmt"
"io"
"log/slog"
"os"
"os/signal"
"strings"
"syscall"
"time"
"github.com/steipete/discrawl/internal/config"
"github.com/steipete/discrawl/internal/discord"
"github.com/steipete/discrawl/internal/embed"
"github.com/steipete/discrawl/internal/store"
"github.com/steipete/discrawl/internal/syncer"
"github.com/openclaw/crawlkit/embed"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/discord"
"github.com/openclaw/discrawl/internal/discorddesktop"
"github.com/openclaw/discrawl/internal/share"
"github.com/openclaw/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/syncer"
)
type syncSources struct {
name string
discord bool
wiretap bool
}
type syncRunStats struct {
Source string `json:"source"`
Discord *syncer.SyncStats `json:"discord,omitempty"`
Wiretap *discorddesktop.Stats `json:"wiretap,omitempty"`
}
func (r *runtime) runInit(args []string) error {
fs := flag.NewFlagSet("init", flag.ContinueOnError)
fs.SetOutput(io.Discard)
fromOpenClaw := fs.String("from-openclaw", "", "")
account := fs.String("account", "", "")
guildID := fs.String("guild", "", "")
dbPath := fs.String("db", "", "")
withEmbeddings := fs.Bool("with-embeddings", false, "")
@ -29,12 +44,6 @@ func (r *runtime) runInit(args []string) error {
return usageErr(err)
}
cfg := config.Default()
if *fromOpenClaw != "" {
cfg.Discord.OpenClawConfig = *fromOpenClaw
}
if *account != "" {
cfg.Discord.Account = *account
}
if *dbPath != "" {
cfg.DBPath = *dbPath
}
@ -74,10 +83,6 @@ func (r *runtime) runInit(args []string) error {
}
if *guildID != "" {
cfg.DefaultGuildID = *guildID
} else if info, err := config.LoadOpenClawDiscord(cfg.Discord.OpenClawConfig, cfg.Discord.Account); err == nil {
if len(info.GuildIDs) == 1 {
cfg.DefaultGuildID = info.GuildIDs[0]
}
}
if cfg.DefaultGuildID == "" && len(cfg.GuildIDs) == 1 {
cfg.DefaultGuildID = cfg.GuildIDs[0]
@ -103,14 +108,29 @@ func (r *runtime) runSync(args []string) error {
since := fs.String("since", "", "")
channels := fs.String("channels", "", "")
concurrency := fs.Int("concurrency", r.cfg.Sync.Concurrency, "")
source := fs.String("source", r.cfg.Sync.Source, "")
withEmbeddings := fs.Bool("with-embeddings", false, "")
skipMembers := fs.Bool("skip-members", false, "")
latestOnly := fs.Bool("latest-only", false, "")
guildsFlag := fs.String("guilds", "", "")
guildFlag := fs.String("guild", "", "")
updateMode := fs.String("update", "", "")
noUpdate := fs.Bool("no-update", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if *noUpdate && strings.TrimSpace(*updateMode) != "" && !strings.EqualFold(strings.TrimSpace(*updateMode), string(shareUpdateNever)) {
return usageErr(errors.New("use either --no-update or --update, not both"))
}
if strings.TrimSpace(*updateMode) != "" {
if _, err := parseShareUpdateMode(*updateMode); err != nil {
return usageErr(err)
}
}
sources, err := parseSyncSources(*source)
if err != nil {
return usageErr(err)
}
var sinceTime time.Time
if *since != "" {
parsed, err := time.Parse(time.RFC3339, *since)
@ -123,8 +143,7 @@ func (r *runtime) runSync(args []string) error {
if err != nil {
return usageErr(err)
}
defaultLatest := !*full && !*allChannels && *since == "" && *channels == ""
latestMode := *latestOnly || defaultLatest
defaultLatest := defaultLatestSyncMode(*full, *allChannels, *since, *channels)
opts := syncer.SyncOptions{
Full: *full,
GuildIDs: guildIDs,
@ -132,14 +151,98 @@ func (r *runtime) runSync(args []string) error {
Concurrency: *concurrency,
Since: sinceTime,
Embeddings: *withEmbeddings,
SkipMembers: *skipMembers || defaultLatest,
LatestOnly: latestMode,
SkipMembers: syncSkipsMembers(*skipMembers, defaultLatest),
LatestOnly: syncLatestOnly(*latestOnly, defaultLatest),
}
stats, err := r.syncer.Sync(r.ctx, opts)
if err != nil {
return err
return r.withSyncLock(func() error {
return r.runSyncLocked(sources, opts)
})
}
func (r *runtime) runSyncLocked(sources syncSources, opts syncer.SyncOptions) error {
var apiStats *syncer.SyncStats
if sources.discord {
r.setSyncLockPhase("discord sync")
shouldClose := r.client == nil
if err := r.ensureDiscordServices(); err != nil {
return err
}
if shouldClose && r.client != nil {
defer func() { _ = r.client.Close() }()
}
stats, err := r.syncer.Sync(r.ctx, opts)
if err != nil {
return err
}
apiStats = &stats
}
return r.print(stats)
var wiretapStats *discorddesktop.Stats
if sources.wiretap {
r.setSyncLockPhase("wiretap import")
stats, err := discorddesktop.Import(r.ctx, r.store, discorddesktop.Options{
Path: r.cfg.Desktop.Path,
MaxFileBytes: r.cfg.Desktop.MaxFileBytes,
FullCache: r.cfg.Desktop.FullCache,
Now: r.now,
})
if err != nil {
return err
}
wiretapStats = &stats
}
if sources.discord && !sources.wiretap {
return r.print(*apiStats)
}
if sources.wiretap && !sources.discord {
return r.print(*wiretapStats)
}
return r.print(syncRunStats{Source: sources.name, Discord: apiStats, Wiretap: wiretapStats})
}
func defaultLatestSyncMode(full bool, allChannels bool, since string, channels string) bool {
return !full && !allChannels && since == "" && channels == ""
}
func syncLatestOnly(explicit bool, defaultLatest bool) bool {
return explicit || defaultLatest
}
func syncSkipsMembers(explicit bool, defaultLatest bool) bool {
return explicit || defaultLatest
}
func parseSyncSources(raw string) (syncSources, error) {
normalized := strings.ToLower(strings.TrimSpace(raw))
if normalized == "" {
normalized = "both"
}
normalized = strings.ReplaceAll(normalized, "+", ",")
parts := strings.Split(normalized, ",")
out := syncSources{name: normalized}
for _, part := range parts {
switch strings.TrimSpace(part) {
case "", "both", "all":
out.discord = true
out.wiretap = true
case "discord", "api", "bot", "key":
out.discord = true
case "wiretap", "desktop", "cache":
out.wiretap = true
default:
return syncSources{}, fmt.Errorf("invalid --source %q; use both, discord, or wiretap", raw)
}
}
switch {
case out.discord && out.wiretap:
out.name = "both"
case out.discord:
out.name = "discord"
case out.wiretap:
out.name = "wiretap"
default:
return syncSources{}, fmt.Errorf("invalid --source %q; use both, discord, or wiretap", raw)
}
return out, nil
}
func (r *runtime) runTail(args []string) error {
@ -156,17 +259,93 @@ func (r *runtime) runTail(args []string) error {
return r.syncer.RunTail(ctx, r.resolveSyncGuilds(*guildFlag, *guildsFlag), *repairEvery)
}
func (r *runtime) runWiretap(args []string) error {
fs := flag.NewFlagSet("wiretap", flag.ContinueOnError)
fs.SetOutput(io.Discard)
path := fs.String("path", r.cfg.Desktop.Path, "")
maxFileBytes := fs.Int64("max-file-bytes", r.cfg.Desktop.MaxFileBytes, "")
fullCache := fs.Bool("full-cache", r.cfg.Desktop.FullCache, "")
dryRun := fs.Bool("dry-run", false, "")
watchEvery := fs.Duration("watch-every", 0, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("wiretap takes flags only"))
}
if *maxFileBytes <= 0 {
return usageErr(errors.New("--max-file-bytes must be positive"))
}
runOnce := func(ctx context.Context) error {
stats, err := discorddesktop.Import(ctx, r.store, discorddesktop.Options{
Path: *path,
MaxFileBytes: *maxFileBytes,
FullCache: *fullCache,
DryRun: *dryRun,
Now: r.now,
})
if err != nil {
return err
}
return r.print(stats)
}
if *watchEvery <= 0 {
return runOnce(r.ctx)
}
if *watchEvery < time.Second {
return usageErr(errors.New("--watch-every must be at least 1s"))
}
ctx, stop := signal.NotifyContext(r.ctx, os.Interrupt, syscall.SIGTERM)
defer stop()
if err := runOnce(ctx); err != nil {
return err
}
ticker := time.NewTicker(*watchEvery)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return nil
case <-ticker.C:
if err := runOnce(ctx); err != nil {
return err
}
}
}
}
func (r *runtime) runStatus(args []string) error {
if len(args) != 0 {
return usageErr(fmt.Errorf("status takes no arguments"))
fs := flag.NewFlagSet("status", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("status takes no arguments"))
}
if *jsonOut {
r.json = true
}
dbPath, err := config.ExpandPath(r.cfg.DBPath)
if err != nil {
return configErr(err)
}
status, err := r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
if err != nil {
return err
status := store.Status{DBPath: dbPath, DefaultGuildID: r.cfg.EffectiveDefaultGuildID()}
if r.store != nil {
status, err = r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
if err != nil {
return err
}
}
if r.json {
needsUpdate := false
if r.store != nil && r.cfg.ShareEnabled() {
if staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter); err == nil {
needsUpdate = share.NeedsImport(r.ctx, r.store, staleAfter)
}
}
return r.print(controlStatus(r.configPath, r.cfg, status, needsUpdate))
}
return r.print(status)
}
@ -181,21 +360,21 @@ func (r *runtime) runEmbed(args []string) error {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("embed takes no positional arguments"))
return usageErr(errors.New("embed takes no positional arguments"))
}
if *limit <= 0 {
return usageErr(fmt.Errorf("--limit must be positive"))
return usageErr(errors.New("--limit must be positive"))
}
if *batchSize <= 0 {
return usageErr(fmt.Errorf("--batch-size must be positive"))
return usageErr(errors.New("--batch-size must be positive"))
}
if !r.cfg.Search.Embeddings.Enabled {
return usageErr(fmt.Errorf("embeddings are disabled in config"))
return usageErr(errors.New("embeddings are disabled in config"))
}
providerFactory := r.newEmbed
if providerFactory == nil {
providerFactory = func(cfg config.EmbeddingsConfig) (embed.Provider, error) {
return embed.NewProvider(cfg)
return embed.NewProvider(crawlkitEmbeddingConfig(cfg))
}
}
provider, err := providerFactory(r.cfg.Search.Embeddings)
@ -227,8 +406,17 @@ func (r *runtime) runEmbed(args []string) error {
}
func (r *runtime) runDoctor(args []string) error {
if len(args) != 0 {
return usageErr(fmt.Errorf("doctor takes no arguments"))
fs := flag.NewFlagSet("doctor", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("doctor takes no arguments"))
}
if *jsonOut {
r.json = true
}
report := map[string]any{
"config_path": r.configPath,
@ -247,7 +435,7 @@ func (r *runtime) runDoctor(args []string) error {
report["share_stale_after"] = cfg.Share.StaleAfter
}
if cfg.Search.Embeddings.Enabled {
check := embed.CheckProvider(r.ctx, cfg.Search.Embeddings)
check := embed.CheckProvider(r.ctx, crawlkitEmbeddingConfig(cfg.Search.Embeddings))
report["embeddings"] = check.Status
report["embeddings_provider"] = check.Provider
report["embeddings_model"] = check.Model

104
internal/cli/analytics.go Normal file
View File

@ -0,0 +1,104 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"strings"
"github.com/openclaw/discrawl/internal/report"
)
func (r *runtime) runAnalytics(args []string) error {
if len(args) == 0 {
printAnalyticsUsage(r.stdout)
return nil
}
subcommand := strings.TrimSpace(args[0])
subArgs := args[1:]
switch subcommand {
case "quiet":
return r.withLocalStoreRead(true, func() error {
return r.runAnalyticsQuiet(subArgs)
})
case "trends":
return r.withLocalStoreRead(true, func() error {
return r.runAnalyticsTrends(subArgs)
})
default:
return usageErr(fmt.Errorf("unknown analytics subcommand %q", subcommand))
}
}
func printAnalyticsUsage(w io.Writer) {
_, _ = fmt.Fprintln(w, "Usage: discrawl analytics <subcommand> [flags]")
_, _ = fmt.Fprintln(w)
_, _ = fmt.Fprintln(w, "Subcommands:")
_, _ = fmt.Fprintln(w, " quiet Channels with no activity in the lookback window.")
_, _ = fmt.Fprintln(w, " trends Week-over-week message counts per channel.")
}
func (r *runtime) runAnalyticsQuiet(args []string) error {
fs := flag.NewFlagSet("analytics quiet", flag.ContinueOnError)
fs.SetOutput(io.Discard)
since := fs.String("since", "30d", "")
guild := fs.String("guild", "", "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("analytics quiet takes no positional arguments"))
}
lookback, err := parseLookback(*since)
if err != nil {
return usageErr(fmt.Errorf("parse --since: %w", err))
}
guildID := strings.TrimSpace(*guild)
if guildID == "" {
guildID = r.cfg.EffectiveDefaultGuildID()
}
quiet, err := report.BuildQuiet(r.ctx, r.store, report.QuietOptions{
Since: lookback,
GuildID: guildID,
})
if err != nil {
return err
}
return r.print(quiet)
}
func (r *runtime) runAnalyticsTrends(args []string) error {
fs := flag.NewFlagSet("analytics trends", flag.ContinueOnError)
fs.SetOutput(io.Discard)
weeks := fs.Int("weeks", 8, "")
guild := fs.String("guild", "", "")
channel := fs.String("channel", "", "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("analytics trends takes no positional arguments"))
}
if *weeks < 0 {
return usageErr(errors.New("--weeks must be zero or greater"))
}
guildID := strings.TrimSpace(*guild)
if guildID == "" {
guildID = r.cfg.EffectiveDefaultGuildID()
}
trends, err := report.BuildTrends(r.ctx, r.store, report.TrendsOptions{
Weeks: *weeks,
GuildID: guildID,
Channel: *channel,
})
if err != nil {
return err
}
return r.print(trends)
}

View File

@ -0,0 +1,216 @@
package cli
import (
"bytes"
"context"
"encoding/json"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/store"
)
func TestAnalyticsCommand(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "discrawl.db")
require.NoError(t, seedAnalyticsCLIStore(ctx, dbPath))
cfg := config.Default()
cfg.DBPath = dbPath
cfg.DefaultGuildID = "g1"
require.NoError(t, config.Write(cfgPath, cfg))
t.Run("analytics with no subcommand prints usage", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "analytics"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "Usage: discrawl analytics <subcommand> [flags]")
require.Contains(t, out.String(), "quiet")
require.Contains(t, out.String(), "trends")
})
t.Run("analytics quiet json schema", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "analytics", "quiet", "--since", "30d"}, &out, &bytes.Buffer{}))
var payload map[string]any
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
require.Contains(t, payload, "generated_at")
require.Contains(t, payload, "since")
require.Contains(t, payload, "until")
require.Contains(t, payload, "channels")
channels, ok := payload["channels"].([]any)
require.True(t, ok)
require.NotEmpty(t, channels)
first, ok := channels[0].(map[string]any)
require.True(t, ok)
require.Contains(t, first, "channel_id")
require.Contains(t, first, "channel_name")
require.Contains(t, first, "guild_id")
require.Contains(t, first, "days_silent")
totals, ok := payload["totals"].(map[string]any)
require.True(t, ok)
require.Contains(t, totals, "channels")
})
t.Run("analytics quiet human output", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "analytics", "quiet", "--since", "30d"}, &out, &bytes.Buffer{}))
text := out.String()
require.Contains(t, text, "CHANNEL")
require.Contains(t, text, "stale")
require.Contains(t, text, "Window:")
require.Contains(t, text, "Totals: channels=")
})
t.Run("analytics quiet plain output", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--plain", "analytics", "quiet", "--since", "30d"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "c3\tstale\ttext\tg1\t")
})
t.Run("analytics trends json schema", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "analytics", "trends", "--weeks", "4"}, &out, &bytes.Buffer{}))
var payload map[string]any
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
require.InEpsilon(t, 4, payload["weeks"], 0.001)
require.Contains(t, payload, "rows")
rows, ok := payload["rows"].([]any)
require.True(t, ok)
require.NotEmpty(t, rows)
first, ok := rows[0].(map[string]any)
require.True(t, ok)
require.Contains(t, first, "channel_id")
require.Contains(t, first, "channel_name")
require.Contains(t, first, "weekly")
weekly := first["weekly"].([]any)
require.Len(t, weekly, 4)
weekRow := weekly[0].(map[string]any)
require.Contains(t, weekRow, "week_start")
require.Contains(t, weekRow, "messages")
})
t.Run("analytics trends human output", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "analytics", "trends", "--weeks", "4"}, &out, &bytes.Buffer{}))
text := out.String()
require.Contains(t, text, "CHANNEL")
require.Contains(t, text, "TOTAL")
require.Contains(t, text, "general")
require.Contains(t, text, "Window:")
})
t.Run("analytics trends plain output", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--plain", "analytics", "trends", "--weeks", "4"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "g1\tc1\tgeneral\ttext\t")
})
t.Run("unknown analytics subcommand returns usage error", func(t *testing.T) {
err := Run(ctx, []string{"--config", cfgPath, "analytics", "unknown-sub"}, &bytes.Buffer{}, &bytes.Buffer{})
require.Error(t, err)
require.Equal(t, 2, ExitCode(err))
})
t.Run("quiet validates its own flags", func(t *testing.T) {
cases := [][]string{
{"--config", cfgPath, "analytics", "quiet", "--bogus"},
{"--config", cfgPath, "analytics", "quiet", "extra"},
{"--config", cfgPath, "analytics", "trends", "--bogus"},
{"--config", cfgPath, "analytics", "trends", "--weeks", "-1"},
{"--config", cfgPath, "analytics", "trends", "extra"},
}
for _, args := range cases {
err := Run(ctx, args, &bytes.Buffer{}, &bytes.Buffer{})
require.Error(t, err)
require.Equal(t, 2, ExitCode(err))
}
})
}
func seedAnalyticsCLIStore(ctx context.Context, path string) error {
s, err := store.Open(ctx, path)
if err != nil {
return err
}
defer func() { _ = s.Close() }()
now := time.Now().UTC()
if err := s.UpsertGuild(ctx, store.GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}); err != nil {
return err
}
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}); err != nil {
return err
}
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g1", Kind: "text", Name: "incidents", RawJSON: `{}`}); err != nil {
return err
}
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c3", GuildID: "g1", Kind: "text", Name: "stale", RawJSON: `{}`}); err != nil {
return err
}
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c4", GuildID: "g1", Kind: "forum", Name: "never", RawJSON: `{}`}); err != nil {
return err
}
return s.UpsertMessages(ctx, []store.MessageMutation{
{
Record: store.MessageRecord{
ID: "m1",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "general",
AuthorID: "u1",
AuthorName: "Alice",
CreatedAt: now.Add(-2 * time.Hour).Format(time.RFC3339Nano),
Content: "hello",
NormalizedContent: "hello",
RawJSON: `{}`,
},
},
{
Record: store.MessageRecord{
ID: "m2",
GuildID: "g1",
ChannelID: "c2",
ChannelName: "incidents",
AuthorID: "u2",
AuthorName: "Bob",
CreatedAt: now.Add(-9 * 24 * time.Hour).Format(time.RFC3339Nano),
Content: "incident",
NormalizedContent: "incident",
RawJSON: `{}`,
},
},
{
Record: store.MessageRecord{
ID: "m3",
GuildID: "g1",
ChannelID: "c3",
ChannelName: "stale",
AuthorID: "u1",
AuthorName: "Alice",
CreatedAt: now.Add(-45 * 24 * time.Hour).Format(time.RFC3339Nano),
Content: "old",
NormalizedContent: "old",
RawJSON: `{}`,
},
},
})
}

View File

@ -11,12 +11,12 @@ import (
"time"
"github.com/bwmarrin/discordgo"
"github.com/steipete/discrawl/internal/config"
"github.com/steipete/discrawl/internal/discord"
"github.com/steipete/discrawl/internal/embed"
"github.com/steipete/discrawl/internal/share"
"github.com/steipete/discrawl/internal/store"
"github.com/steipete/discrawl/internal/syncer"
"github.com/openclaw/crawlkit/embed"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/discord"
"github.com/openclaw/discrawl/internal/share"
"github.com/openclaw/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/syncer"
)
type cliError struct {
@ -28,10 +28,17 @@ func (e *cliError) Error() string {
return e.err.Error()
}
func (e *cliError) Unwrap() error {
return e.err
}
func ExitCode(err error) int {
if err == nil {
return 0
}
if errors.Is(err, context.Canceled) {
return 1
}
var codeErr *cliError
if errors.As(err, &codeErr) {
return codeErr.code
@ -40,6 +47,10 @@ func ExitCode(err error) int {
}
func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
printUsage(stdout)
return nil
}
global := flag.NewFlagSet("discrawl", flag.ContinueOnError)
global.SetOutput(io.Discard)
configPath := global.String("config", "", "")
@ -59,10 +70,14 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
return nil
}
rest := global.Args()
if len(rest) == 0 || rest[0] == "help" {
if len(rest) == 0 || rest[0] == "help" || rest[0] == "--help" || rest[0] == "-h" {
printUsage(stdout)
return nil
}
if rest[0] == "version" {
_, _ = io.WriteString(stdout, version+"\n")
return nil
}
level := slog.LevelInfo
if *quiet {
level = slog.LevelError
@ -83,22 +98,35 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
}
type runtime struct {
ctx context.Context
configPath string
cfg config.Config
stdout io.Writer
stderr io.Writer
json bool
plain bool
logger *slog.Logger
store *store.Store
client discordClient
syncer syncService
openStore func(context.Context, string) (*store.Store, error)
newDiscord func(config.Config) (discordClient, error)
newSyncer func(syncer.Client, *store.Store, *slog.Logger) syncService
newEmbed func(config.EmbeddingsConfig) (embed.Provider, error)
now func() time.Time
ctx context.Context
configPath string
cfg config.Config
stdout io.Writer
stderr io.Writer
json bool
plain bool
logger *slog.Logger
store *store.Store
client discordClient
syncer syncService
dbLockHeld bool
lockStarted time.Time
openStore func(context.Context, string) (*store.Store, error)
newDiscord func(config.Config) (discordClient, error)
newSyncer func(syncer.Client, *store.Store, *slog.Logger) syncService
newEmbed func(config.EmbeddingsConfig) (embed.Provider, error)
now func() time.Time
}
func crawlkitEmbeddingConfig(cfg config.EmbeddingsConfig) embed.Config {
return embed.Config{
Provider: cfg.Provider,
Model: cfg.Model,
BaseURL: cfg.BaseURL,
APIKeyEnv: cfg.APIKeyEnv,
RequestTimeout: cfg.RequestTimeout,
MaxInputChars: cfg.MaxInputChars,
}
}
type discordClient interface {
@ -120,36 +148,65 @@ type attachmentTextConfigurer interface {
func (r *runtime) dispatch(rest []string) error {
switch rest[0] {
case "metadata":
return r.runMetadata(rest[1:])
case "init":
return r.runInit(rest[1:])
case "sync":
return r.withServicesAuto(true, true, func() error { return r.runSync(rest[1:]) })
updateMode, err := syncShareUpdateMode(rest[1:])
if err != nil {
return usageErr(err)
}
return r.withLocalStoreUpdateLocked(updateMode, true, func() error { return r.runSync(rest[1:]) })
case "tail":
return r.withServices(true, func() error { return r.runTail(rest[1:]) })
return r.withServicesLocked(true, func() error { return r.runTail(rest[1:]) })
case "wiretap":
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
case "tap", "cache-import":
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
case "search":
return r.withServices(false, func() error { return r.runSearch(rest[1:]) })
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
return r.withLocalStoreRead(autoShareUpdate, func() error { return r.runSearch(rest[1:]) })
case "tui":
if hasHelpArg(rest[1:]) {
return r.runTUI(rest[1:])
}
return r.withLocalStoreReadOnly(func() error { return r.runTUI(rest[1:]) })
case "messages":
return r.withServicesAuto(hasBoolFlag(rest[1:], "--sync"), true, func() error { return r.runMessages(rest[1:]) })
if hasBoolFlag(rest[1:], "--sync") && !hasBoolFlag(rest[1:], "--dm") {
return r.withServicesAutoLocked(true, true, true, func() error { return r.runMessages(rest[1:]) })
}
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
return r.withLocalStoreRead(autoShareUpdate, func() error { return r.runMessages(rest[1:]) })
case "digest":
return r.withLocalStoreRead(true, func() error { return r.runDigest(rest[1:]) })
case "analytics":
return r.runAnalytics(rest[1:])
case "dms":
return r.withLocalStoreRead(false, func() error { return r.runDirectMessages(rest[1:]) })
case "mentions":
return r.withServices(false, func() error { return r.runMentions(rest[1:]) })
return r.withLocalStoreRead(true, func() error { return r.runMentions(rest[1:]) })
case "embed":
return r.withServices(false, func() error { return r.runEmbed(rest[1:]) })
return r.withLocalStoreLocked(true, func() error { return r.runEmbed(rest[1:]) })
case "sql":
return r.withServices(false, func() error { return r.runSQL(rest[1:]) })
if boolFlagEnabled(rest[1:], "--unsafe") {
return r.withLocalStoreLocked(true, func() error { return r.runSQL(rest[1:]) })
}
return r.withLocalStoreRead(true, func() error { return r.runSQL(rest[1:]) })
case "members":
return r.withServices(false, func() error { return r.runMembers(rest[1:]) })
return r.withLocalStoreRead(true, func() error { return r.runMembers(rest[1:]) })
case "channels":
return r.withServices(false, func() error { return r.runChannels(rest[1:]) })
return r.withLocalStoreRead(true, func() error { return r.runChannels(rest[1:]) })
case "status":
return r.withServices(false, func() error { return r.runStatus(rest[1:]) })
return r.withLocalStoreReadOnly(func() error { return r.runStatus(rest[1:]) })
case "report":
return r.withServices(false, func() error { return r.runReport(rest[1:]) })
return r.withLocalStoreRead(true, func() error { return r.runReport(rest[1:]) })
case "publish":
return r.withServicesAuto(false, false, func() error { return r.runPublish(rest[1:]) })
return r.withServicesAutoLocked(false, false, true, func() error { return r.runPublish(rest[1:]) })
case "subscribe":
return r.runSubscribe(rest[1:])
case "update":
return r.withServicesAuto(false, false, func() error { return r.runUpdate(rest[1:]) })
return r.withServicesAutoLocked(false, false, true, func() error { return r.runUpdate(rest[1:]) })
case "doctor":
return r.runDoctor(rest[1:])
default:
@ -161,7 +218,205 @@ func (r *runtime) withServices(withDiscord bool, fn func() error) error {
return r.withServicesAuto(withDiscord, !withDiscord, fn)
}
func (r *runtime) withServicesLocked(withDiscord bool, fn func() error) error {
return r.withServicesAutoLocked(withDiscord, !withDiscord, true, fn)
}
func (r *runtime) withLocalStoreLocked(autoShareUpdate bool, fn func() error) error {
return r.withLocalStoreUpdateLocked(boolShareUpdateMode(autoShareUpdate), true, fn)
}
func (r *runtime) withLocalStoreRead(autoShareUpdate bool, fn func() error) error {
return r.withLocalStoreReadUpdate(boolShareUpdateMode(autoShareUpdate), fn)
}
func (r *runtime) withLocalStoreReadUpdate(updateMode shareUpdateMode, fn func() error) error {
cfg, err := config.Load(r.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return configErr(err)
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return configErr(err)
}
}
if err := config.EnsureRuntimeDirs(cfg); err != nil {
return configErr(err)
}
dbPath, err := config.ExpandPath(cfg.DBPath)
if err != nil {
return configErr(err)
}
r.cfg = cfg
if r.shouldAutoUpdateShare(updateMode) {
if err := r.autoUpdateShareIfLockAvailable(dbPath, updateMode); err != nil {
return err
}
}
return r.openLocalStoreReadOnly(dbPath, fn)
}
func (r *runtime) withLocalStoreUpdateLocked(updateMode shareUpdateMode, lockDB bool, fn func() error) error {
cfg, err := config.Load(r.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return configErr(err)
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return configErr(err)
}
}
if err := config.EnsureRuntimeDirs(cfg); err != nil {
return configErr(err)
}
dbPath, err := config.ExpandPath(cfg.DBPath)
if err != nil {
return configErr(err)
}
r.cfg = cfg
if lockDB {
return r.withSyncLock(func() error {
return r.openLocalStore(dbPath, updateMode, fn)
})
}
return r.openLocalStore(dbPath, updateMode, fn)
}
func (r *runtime) shouldAutoUpdateShare(mode shareUpdateMode) bool {
return os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" &&
r.cfg.ShareEnabled() &&
(mode == shareUpdateForce || mode == shareUpdateAuto || (mode == shareUpdateConfigured && r.cfg.Share.AutoUpdate))
}
func (r *runtime) autoUpdateShareIfLockAvailable(dbPath string, updateMode shareUpdateMode) error {
locked, err := r.tryWithSyncLock(func() error {
storeFactory := r.openStore
if storeFactory == nil {
storeFactory = store.Open
}
var openErr error
r.store, openErr = storeFactory(r.ctx, dbPath)
if openErr != nil {
return dbErr(openErr)
}
defer func() {
_ = r.store.Close()
r.store = nil
}()
return r.autoUpdateShare(updateMode)
})
if err != nil {
return err
}
if !locked {
r.logger.Info("share update skipped; sync lock is held")
}
return nil
}
func (r *runtime) openLocalStore(dbPath string, updateMode shareUpdateMode, fn func() error) error {
storeFactory := r.openStore
if storeFactory == nil {
storeFactory = store.Open
}
var err error
r.store, err = storeFactory(r.ctx, dbPath)
if err != nil {
return dbErr(err)
}
defer func() { _ = r.store.Close() }()
if updateMode != shareUpdateNever && os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" {
if err := r.autoUpdateShare(updateMode); err != nil {
return err
}
}
return fn()
}
func (r *runtime) withLocalStoreReadOnly(fn func() error) error {
cfg, err := config.Load(r.configPath)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return configErr(err)
}
cfg = config.Default()
if err := cfg.Normalize(); err != nil {
return configErr(err)
}
}
dbPath, err := config.ExpandPath(cfg.DBPath)
if err != nil {
return configErr(err)
}
r.cfg = cfg
var openErr error
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
if openErr != nil {
if errors.Is(openErr, os.ErrNotExist) {
r.store = nil
return fn()
}
return dbErr(openErr)
}
defer func() { _ = r.store.Close() }()
return fn()
}
func (r *runtime) openLocalStoreReadOnly(dbPath string, fn func() error) error {
var openErr error
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
if openErr != nil {
if errors.Is(openErr, os.ErrNotExist) {
storeFactory := r.openStore
if storeFactory == nil {
storeFactory = store.Open
}
r.store, openErr = storeFactory(r.ctx, dbPath)
if openErr == nil {
defer func() { _ = r.store.Close() }()
return fn()
}
}
if errors.Is(openErr, store.ErrSchemaVersionMismatch) {
if err := r.withSyncLock(func() error {
storeFactory := r.openStore
if storeFactory == nil {
storeFactory = store.Open
}
var migrateErr error
r.store, migrateErr = storeFactory(r.ctx, dbPath)
if migrateErr != nil {
return dbErr(migrateErr)
}
closeErr := r.store.Close()
r.store = nil
return closeErr
}); err != nil {
return err
}
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
if openErr == nil {
defer func() { _ = r.store.Close() }()
return fn()
}
}
return dbErr(openErr)
}
defer func() { _ = r.store.Close() }()
return fn()
}
func (r *runtime) withServicesAuto(withDiscord, autoShareUpdate bool, fn func() error) error {
return r.withServicesAutoLocked(withDiscord, autoShareUpdate, false, fn)
}
func (r *runtime) withServicesAutoLocked(withDiscord, autoShareUpdate, lockDB bool, fn func() error) error {
return r.withServicesUpdateLocked(withDiscord, boolShareUpdateMode(autoShareUpdate), lockDB, fn)
}
func (r *runtime) withServicesUpdateLocked(withDiscord bool, updateMode shareUpdateMode, lockDB bool, fn func() error) error {
cfg, err := config.Load(r.configPath)
if err != nil {
return configErr(err)
@ -174,68 +429,91 @@ func (r *runtime) withServicesAuto(withDiscord, autoShareUpdate bool, fn func()
return configErr(err)
}
r.cfg = cfg
if lockDB {
return r.withSyncLock(func() error {
return r.openServices(dbPath, withDiscord, updateMode, fn)
})
}
return r.openServices(dbPath, withDiscord, updateMode, fn)
}
func (r *runtime) openServices(dbPath string, withDiscord bool, updateMode shareUpdateMode, fn func() error) error {
storeFactory := r.openStore
if storeFactory == nil {
storeFactory = store.Open
}
var err error
r.store, err = storeFactory(r.ctx, dbPath)
if err != nil {
return dbErr(err)
}
defer func() { _ = r.store.Close() }()
if autoShareUpdate && os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" {
if err := r.autoUpdateShare(); err != nil {
if updateMode != shareUpdateNever && os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" {
if err := r.autoUpdateShare(updateMode); err != nil {
return err
}
}
if withDiscord {
discordFactory := r.newDiscord
if discordFactory == nil {
discordFactory = func(cfg config.Config) (discordClient, error) {
token, err := config.ResolveDiscordToken(cfg)
if err != nil {
return nil, err
}
return discord.New(token.Token)
}
if err := r.ensureDiscordServices(); err != nil {
return err
}
r.client, err = discordFactory(cfg)
if err != nil {
return authErr(err)
}
defer func() { _ = r.client.Close() }()
syncerFactory := r.newSyncer
if syncerFactory == nil {
syncerFactory = func(client syncer.Client, s *store.Store, logger *slog.Logger) syncService {
return syncer.New(client, s, logger)
}
}
r.syncer = syncerFactory(r.client, r.store, r.logger)
if configurable, ok := r.syncer.(attachmentTextConfigurer); ok {
configurable.SetAttachmentTextEnabled(cfg.AttachmentTextEnabled())
if r.client != nil {
defer func() { _ = r.client.Close() }()
}
}
return fn()
}
func (r *runtime) autoUpdateShare() error {
if !r.cfg.ShareEnabled() || !r.cfg.Share.AutoUpdate {
func (r *runtime) ensureDiscordServices() error {
discordFactory := r.newDiscord
if discordFactory == nil {
discordFactory = func(cfg config.Config) (discordClient, error) {
token, err := config.ResolveDiscordToken(cfg)
if err != nil {
return nil, err
}
return discord.New(token.Token)
}
}
client, err := discordFactory(r.cfg)
if err != nil {
return authErr(err)
}
r.client = client
syncerFactory := r.newSyncer
if syncerFactory == nil {
syncerFactory = func(client syncer.Client, s *store.Store, logger *slog.Logger) syncService {
return syncer.New(client, s, logger)
}
}
r.syncer = syncerFactory(r.client, r.store, r.logger)
if configurable, ok := r.syncer.(attachmentTextConfigurer); ok {
configurable.SetAttachmentTextEnabled(r.cfg.AttachmentTextEnabled())
}
return nil
}
func (r *runtime) autoUpdateShare(mode shareUpdateMode) error {
if !r.cfg.ShareEnabled() || (mode == shareUpdateConfigured && !r.cfg.Share.AutoUpdate) {
return nil
}
staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter)
if err != nil {
return configErr(fmt.Errorf("invalid share.stale_after: %w", err))
}
if !share.NeedsImport(r.ctx, r.store, staleAfter) {
if mode != shareUpdateForce && !share.NeedsImport(r.ctx, r.store, staleAfter) {
return nil
}
opts, err := r.shareOptions()
if err != nil {
return err
}
r.setSyncLockPhase("share pull")
r.logger.Info("share update pulling", "repo_path", opts.RepoPath, "remote", opts.Remote)
if err := share.Pull(r.ctx, opts); err != nil {
return err
}
r.setSyncLockPhase("share import")
_, _, err = share.ImportIfChanged(r.ctx, r.store, opts)
if errors.Is(err, share.ErrNoManifest) {
return nil
@ -252,5 +530,6 @@ func (r *runtime) shareOptions() (share.Options, error) {
RepoPath: repoPath,
Remote: r.cfg.Share.Remote,
Branch: r.cfg.Share.Branch,
Progress: r.shareProgress,
}, nil
}

View File

@ -4,12 +4,15 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"io"
"log/slog"
"net/http"
"net/http/httptest"
"os"
"os/exec"
"path/filepath"
goruntime "runtime"
"testing"
"time"
@ -17,11 +20,13 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/steipete/discrawl/internal/config"
discordclient "github.com/steipete/discrawl/internal/discord"
"github.com/steipete/discrawl/internal/share"
"github.com/steipete/discrawl/internal/store"
"github.com/steipete/discrawl/internal/syncer"
"github.com/openclaw/discrawl/internal/config"
discordclient "github.com/openclaw/discrawl/internal/discord"
"github.com/openclaw/discrawl/internal/discorddesktop"
"github.com/openclaw/discrawl/internal/report"
"github.com/openclaw/discrawl/internal/share"
"github.com/openclaw/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/syncer"
)
func TestHelpAndVersion(t *testing.T) {
@ -33,10 +38,196 @@ func TestHelpAndVersion(t *testing.T) {
out.Reset()
require.NoError(t, Run(context.Background(), []string{"--version"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "0.4.1")
require.Contains(t, out.String(), "0.7.0")
err := Run(context.Background(), []string{"bogus"}, &out, &bytes.Buffer{})
require.Equal(t, 2, ExitCode(err))
require.Equal(t, 1, ExitCode(context.Canceled))
require.Equal(t, 7, ExitCode(&cliError{code: 7, err: errors.New("custom")}))
}
func TestCommandValidationEdges(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "discrawl.db")
cfg := config.Default()
cfg.DBPath = dbPath
cfg.Discord.TokenSource = "none"
require.NoError(t, config.Write(cfgPath, cfg))
s, err := store.Open(ctx, dbPath)
require.NoError(t, err)
require.NoError(t, s.Close())
cases := [][]string{
{"--config", cfgPath, "--bogus"},
{"--config", cfgPath, "search"},
{"--config", cfgPath, "search", "--mode", "bogus", "term"},
{"--config", cfgPath, "messages"},
{"--config", cfgPath, "messages", "--hours", "-1", "--channel", "general"},
{"--config", cfgPath, "messages", "--hours", "1", "--days", "1", "--channel", "general"},
{"--config", cfgPath, "messages", "--all", "--last", "1", "--channel", "general"},
{"--config", cfgPath, "messages", "--dm", "--sync", "--channel", "alice"},
{"--config", cfgPath, "dms", "--hours", "-1"},
{"--config", cfgPath, "dms", "--limit", "1", "--last", "1", "--with", "alice"},
{"--config", cfgPath, "mentions"},
{"--config", cfgPath, "mentions", "--days", "-1", "--target", "u1"},
{"--config", cfgPath, "mentions", "--type", "channel", "--target", "u1"},
{"--config", cfgPath, "digest", "--since", "-1d"},
{"--config", cfgPath, "analytics", "wat"},
{"--config", cfgPath, "analytics", "quiet", "extra"},
{"--config", cfgPath, "analytics", "trends", "--weeks", "-1"},
{"--config", cfgPath, "channels"},
{"--config", cfgPath, "channels", "wat"},
{"--config", cfgPath, "channels", "show"},
{"--config", cfgPath, "status", "extra"},
{"--config", cfgPath, "report", "extra"},
{"--config", cfgPath, "wiretap", "extra"},
{"--config", cfgPath, "wiretap", "--max-file-bytes", "0"},
{"--config", cfgPath, "sync", "--source", "bogus"},
{"--config", cfgPath, "sync", "--since", "not-time"},
{"--config", cfgPath, "sync", "--no-update", "--update", "force"},
{"--config", cfgPath, "publish", "--remote", ""},
{"--config", cfgPath, "subscribe"},
{"--config", cfgPath, "update", "extra"},
{"--config", cfgPath, "sql", "--confirm", "select 1"},
{"--config", cfgPath, "sql", "--unsafe", "select 1"},
{"--config", cfgPath, "members"},
{"--config", cfgPath, "members", "wat"},
}
for _, args := range cases {
var stdout, stderr bytes.Buffer
err := Run(ctx, args, &stdout, &stderr)
require.Error(t, err, args)
}
}
func TestOutputBranches(t *testing.T) {
now := time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC)
values := []any{
syncRunStats{
Source: "both",
Discord: &syncer.SyncStats{Guilds: 1, Channels: 2, Threads: 3, Members: 4, Messages: 5},
Wiretap: &discorddesktop.Stats{
Path: "/tmp/discord",
FilesVisited: 1,
FilesScanned: 2,
FilesSkipped: 3,
FilesUnchanged: 4,
CacheFilesFastSkipped: 5,
JSONObjects: 6,
Guilds: 7,
Channels: 8,
Messages: 9,
DMMessages: 10,
DMChannels: 11,
GuildMessages: 12,
SkippedMessages: 13,
SkippedChannels: 14,
Checkpoints: 15,
FullCache: true,
DryRun: true,
},
},
syncer.SyncStats{Guilds: 1, Channels: 2, Threads: 3, Members: 4, Messages: 5},
discorddesktop.Stats{Path: "/tmp/discord", FilesVisited: 1, FullCache: true, DryRun: true},
store.EmbeddingDrainStats{
Processed: 3,
Succeeded: 2,
Failed: 1,
Requeued: 4,
RateLimited: true,
RemainingBacklog: 5,
Provider: "openai",
Model: "model",
InputVersion: "v1",
},
[]store.DirectMessageConversationRow{{
ChannelID: "c1",
Name: "Alice",
MessageCount: 2,
AuthorCount: 1,
FirstMessageAt: now.Add(-time.Hour),
LastMessageAt: now,
}},
store.MemberProfile{
Member: store.MemberRow{
GuildID: "g1",
UserID: "u1",
Username: "peter",
DisplayName: "Peter",
JoinedAt: now,
XHandle: "steipete",
GitHubLogin: "steipete",
Website: "https://steipete.me",
Pronouns: "he/him",
Location: "Vienna",
Bio: "Maintainer",
URLs: []string{"https://example.com"},
},
MessageCount: 1,
FirstMessageAt: now.Add(-time.Hour),
LastMessageAt: now,
RecentMessages: []store.MessageRow{{ChannelName: "general", CreatedAt: now, Content: "hello"}},
},
report.Digest{
Since: now.Add(-24 * time.Hour),
Until: now,
WindowLabel: "1d",
Channels: []report.ChannelDigest{{
ChannelID: "c1",
ChannelName: "general",
Kind: "text",
GuildID: "g1",
Messages: 3,
Replies: 1,
ActiveAuthors: 2,
TopPosters: []report.RankedCount{{Name: "Peter", Count: 2}},
TopMentions: []report.RankedCount{{Count: 1}},
}},
Totals: report.DigestTotals{Messages: 3, Replies: 1, Channels: 1, ActiveAuthors: 2},
},
report.Quiet{
Since: now.Add(-24 * time.Hour),
Until: now,
Channels: []report.QuietChannel{{
ChannelID: "c1",
ChannelName: "general",
Kind: "text",
LastMessage: "",
DaysSilent: -1,
}},
Totals: report.QuietTotals{Channels: 1},
},
report.Trends{
Since: now.AddDate(0, 0, -14),
Until: now,
Weeks: 2,
Rows: []report.TrendsRow{{
ChannelID: "c1",
ChannelName: "general",
Kind: "text",
GuildID: "g1",
Weekly: []report.WeeklyCount{
{WeekStart: now.AddDate(0, 0, -14), Messages: 1},
{WeekStart: now.AddDate(0, 0, -7), Messages: 2},
},
}},
},
map[string]any{"b": 2, "a": 1},
}
for _, value := range values {
var out bytes.Buffer
require.NoError(t, printHuman(&out, value))
require.NotEmpty(t, out.String())
}
var plain bytes.Buffer
require.NoError(t, printPlain(&plain, report.Quiet{Channels: []report.QuietChannel{{ChannelID: "c1", ChannelName: "general", Kind: "text", GuildID: "g1", LastMessage: "now", DaysSilent: 0}}}))
require.NoError(t, printPlain(&plain, report.Trends{Rows: []report.TrendsRow{{GuildID: "g1", ChannelID: "c1", ChannelName: "general", Kind: "text", Weekly: []report.WeeklyCount{{WeekStart: now, Messages: 2}}}}}))
require.Error(t, printPlain(io.Discard, struct{}{}))
require.Error(t, printHuman(io.Discard, struct{}{}))
require.Equal(t, "this is a profile field with a very l...", trimForTable("this is a profile field with a very long text value"))
}
func TestStatusSearchSQLAndListings(t *testing.T) {
@ -75,6 +266,21 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
NormalizedContent: "panic locked database",
RawJSON: `{}`,
}))
require.NoError(t, s.UpsertGuild(ctx, store.GuildRecord{ID: "g2", Name: "Other Guild", RawJSON: `{}`}))
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g2", Kind: "text", Name: "random", RawJSON: `{}`}))
require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{
ID: "m-other",
GuildID: "g2",
ChannelID: "c2",
ChannelName: "random",
AuthorID: "u2",
AuthorName: "Outside",
MessageType: 0,
CreatedAt: time.Now().UTC().Add(-time.Hour).Format(time.RFC3339Nano),
Content: "outside default guild",
NormalizedContent: "outside default guild",
RawJSON: `{}`,
}))
require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{
ID: "m2",
GuildID: "g1",
@ -119,6 +325,7 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
tests := [][]string{
{"--config", cfgPath, "status"},
{"--config", cfgPath, "search", "panic"},
{"--config", cfgPath, "search", "panic", "--limit", "1"},
{"--config", cfgPath, "search", "stack"},
{"--config", cfgPath, "search", "--include-empty", "Peter"},
{"--config", cfgPath, "messages", "--channel", "general", "--days", "7", "--all"},
@ -136,6 +343,290 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
require.NoError(t, Run(ctx, args, &out, &bytes.Buffer{}))
require.NotEmpty(t, out.String())
}
for _, args := range [][]string{
{"--config", cfgPath, "metadata", "--json"},
{"--config", cfgPath, "status", "--json"},
} {
var out bytes.Buffer
require.NoError(t, Run(ctx, args, &out, &bytes.Buffer{}))
var payload map[string]any
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
require.NotEmpty(t, payload)
}
before, err := os.ReadFile(dbPath)
require.NoError(t, err)
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "tui", "--limit", "5"}, &out, &bytes.Buffer{}))
var rows []map[string]any
require.NoError(t, json.Unmarshal(out.Bytes(), &rows))
require.NotEmpty(t, rows)
require.Equal(t, "panic locked database", rows[0]["title"])
require.Equal(t, "discord", rows[0]["source"])
require.Equal(t, "message", rows[0]["kind"])
require.Equal(t, "Guild", rows[0]["scope"])
require.Equal(t, "general", rows[0]["container"])
require.Equal(t, "https://discord.com/channels/g1/c1/m1", rows[0]["url"])
after, err := os.ReadFile(dbPath)
require.NoError(t, err)
require.Equal(t, before, after, "tui --json should not mutate the database")
}
func TestTUIHelpReturnsUsage(t *testing.T) {
var stdout bytes.Buffer
var stderr bytes.Buffer
require.NoError(t, Run(context.Background(), []string{"tui", "--help"}, &stdout, &stderr))
require.Contains(t, stdout.String(), "Usage of tui:")
require.Contains(t, stdout.String(), "-limit")
require.Contains(t, stdout.String(), "right-click")
require.Contains(t, stdout.String(), "# jump")
require.Empty(t, stderr.String())
}
func TestControlStatusIncludesShareAndFileSizes(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "discrawl.db")
require.NoError(t, os.WriteFile(dbPath, []byte("db"), 0o600))
require.NoError(t, os.WriteFile(dbPath+"-wal", []byte("wal"), 0o600))
cfg := config.Default()
cfg.DBPath = dbPath
cfg.Share.Remote = "https://github.com/openclaw/discrawl-share.git"
cfg.Share.RepoPath = filepath.Join(dir, "share")
status := store.Status{
DBPath: dbPath,
MessageCount: 5,
ChannelCount: 2,
}
out := controlStatus(filepath.Join(dir, "config.toml"), cfg, status, true)
require.Equal(t, int64(2), out.DatabaseBytes)
require.Equal(t, int64(3), out.WALBytes)
require.Zero(t, fileSize(filepath.Join(dir, "missing.db")))
require.NotNil(t, out.Share)
require.True(t, out.Share.Enabled)
require.True(t, out.Share.NeedsUpdate)
require.Contains(t, out.Summary, "5 messages")
}
func TestFormattingAndTUISourceBranches(t *testing.T) {
require.Equal(t, "-", formatDaysSilent(-1))
require.Equal(t, "4", formatDaysSilent(4))
require.Equal(t, "0", formatWindowDuration(0))
require.Equal(t, "2d", formatWindowDuration(48*time.Hour))
require.Equal(t, "3h", formatWindowDuration(3*time.Hour))
require.Equal(t, "1h30m0s", formatWindowDuration(90*time.Minute))
require.Equal(t, 6*time.Hour, mustDuration("bogus"))
require.Equal(t, 15*time.Minute, mustDuration("15m"))
cfg := config.Default()
cfg.DBPath = "/tmp/discrawl.db"
r := &runtime{cfg: cfg}
require.Equal(t, "local", r.archiveSourceKind())
require.Equal(t, cfg.DBPath, r.archiveSourceLocation())
guilds, err := r.resolveTUIGuilds(false, "", "")
require.NoError(t, err)
require.Empty(t, guilds)
r.cfg.DefaultGuildID = "guild-one"
guilds, err = r.resolveTUIGuilds(false, "", "")
require.NoError(t, err)
require.Equal(t, []string{"guild-one"}, guilds)
r.cfg.Share.Remote = "https://github.com/openclaw/discrawl-share.git"
require.Equal(t, "remote", r.archiveSourceKind())
require.Equal(t, r.cfg.Share.Remote, r.archiveSourceLocation())
}
func TestWiretapImportsDesktopDirectMessages(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "discrawl.db")
desktopPath := filepath.Join(dir, "discord")
require.NoError(t, os.MkdirAll(filepath.Join(desktopPath, "IndexedDB"), 0o755))
require.NoError(t, os.WriteFile(filepath.Join(desktopPath, "IndexedDB", "000001.log"), []byte(`{"id":"111111111111111111","type":1,"recipients":[{"id":"222222222222222222","username":"alice","global_name":"Alice"}]}
{"id":"333333333333333333","channel_id":"111111111111111111","content":"secret DM launch plan","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222222","username":"alice","global_name":"Alice"}}`), 0o600))
cfg := config.Default()
cfg.DBPath = dbPath
cfg.Desktop.Path = desktopPath
cfg.Discord.TokenSource = "none"
require.NoError(t, config.Write(cfgPath, cfg))
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "wiretap"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "messages=1")
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "launch"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "secret DM launch plan")
require.Contains(t, out.String(), "@me")
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "dms"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "Alice")
require.Contains(t, out.String(), "111111111111111111")
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "dms", "--with", "Alice", "--last", "1"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "secret DM launch plan")
require.Contains(t, out.String(), "@me")
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "--dm", "launch"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "secret DM launch plan")
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "messages", "--dm", "--channel", "Alice", "--last", "1"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "secret DM launch plan")
}
func TestDiscordTUIRowsIncludePaneMetadata(t *testing.T) {
rows := discordTUIRows([]store.MessageRow{{
MessageID: "m1",
GuildID: "@me",
GuildName: "Discord Direct Messages",
ChannelID: "c1",
ChannelName: "Vincent K",
AuthorID: "u1",
AuthorName: "Peter",
Content: "hello from desktop",
DisplayContent: "hello from Vincent",
CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC),
ReplyToMessage: "m0",
HasAttachments: true,
AttachmentNames: "trace.txt",
AttachmentText: "stack trace line one",
Pinned: true,
}})
require.Len(t, rows, 1)
require.Equal(t, "hello from Vincent", rows[0].Title)
require.Contains(t, rows[0].Detail, "hello from Vincent")
require.Contains(t, rows[0].Detail, "Attachments")
require.Contains(t, rows[0].Detail, "stack trace line one")
require.Equal(t, "hello from Vincent", rows[0].Text)
require.Equal(t, "Direct messages", rows[0].Scope)
require.Equal(t, "Vincent K", rows[0].Container)
require.Contains(t, rows[0].Tags, "dm")
require.Equal(t, "true", rows[0].Fields["attachments"])
require.Equal(t, "trace.txt", rows[0].Fields["attachment_names"])
require.Equal(t, "true", rows[0].Fields["pinned"])
require.Equal(t, "m0", rows[0].Fields["reply_to"])
require.Equal(t, "@me", rows[0].Fields["guild_id"])
rows = discordTUIRows([]store.MessageRow{{
MessageID: "m2",
GuildID: "g1",
ChannelID: "c2",
AuthorID: "439223656200273932",
Content: "desktop-only author",
CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC),
Source: "discord_desktop",
}})
require.Equal(t, "user:439223...3932", rows[0].Author)
require.Equal(t, "DM c2", discordContainerLabel(store.MessageRow{GuildID: "@me", ChannelID: "c2"}))
require.Contains(t, rows[0].Tags, "discord_desktop")
}
func TestParseMessageWindow(t *testing.T) {
rt := &runtime{now: func() time.Time {
return time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)
}}
since, before, err := rt.parseMessageWindow(6, 0, "", "")
require.NoError(t, err)
require.Equal(t, time.Date(2026, 4, 24, 6, 0, 0, 0, time.UTC), since)
require.True(t, before.IsZero())
since, before, err = rt.parseMessageWindow(0, 2, "", "")
require.NoError(t, err)
require.Equal(t, time.Date(2026, 4, 22, 12, 0, 0, 0, time.UTC), since)
require.True(t, before.IsZero())
since, before, err = rt.parseMessageWindow(0, 0, "2026-04-20T10:00:00Z", "2026-04-21T10:00:00Z")
require.NoError(t, err)
require.Equal(t, time.Date(2026, 4, 20, 10, 0, 0, 0, time.UTC), since)
require.Equal(t, time.Date(2026, 4, 21, 10, 0, 0, 0, time.UTC), before)
_, _, err = rt.parseMessageWindow(0, 0, "bad", "")
require.Equal(t, 2, ExitCode(err))
_, _, err = rt.parseMessageWindow(0, 0, "", "bad")
require.Equal(t, 2, ExitCode(err))
}
func TestWiretapAndSearchWorkWithoutConfig(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
home := filepath.Join(dir, "home")
desktopPath := filepath.Join(dir, "discord")
require.NoError(t, os.MkdirAll(filepath.Join(desktopPath, "IndexedDB"), 0o755))
require.NoError(t, os.MkdirAll(home, 0o755))
t.Setenv("HOME", home)
t.Setenv("USERPROFILE", home)
require.NoError(t, os.WriteFile(filepath.Join(desktopPath, "IndexedDB", "000001.log"), []byte(`{"id":"111111111111111112","type":1,"recipients":[{"id":"222222222222222223","username":"alice","global_name":"Alice"}]}
{"id":"333333333333333334","channel_id":"111111111111111112","content":"local-only DM import","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222223","username":"alice","global_name":"Alice"}}`), 0o600))
cfgPath := filepath.Join(dir, "missing.toml")
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "wiretap", "--path", desktopPath}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "messages=1")
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "local-only"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "local-only DM import")
require.Contains(t, out.String(), "@me")
}
func TestSyncWiretapSourceImportsDesktopMessages(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "discrawl.db")
desktopPath := filepath.Join(dir, "discord")
require.NoError(t, os.MkdirAll(filepath.Join(desktopPath, "IndexedDB"), 0o755))
require.NoError(t, os.WriteFile(filepath.Join(desktopPath, "IndexedDB", "000001.log"), []byte(`{"id":"111111111111111117","type":1,"recipients":[{"id":"222222222222222228","username":"alice","global_name":"Alice"}]}
{"id":"333333333333333339","channel_id":"111111111111111117","content":"sync wiretap import","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222228","username":"alice","global_name":"Alice"}}`), 0o600))
cfg := config.Default()
cfg.DBPath = dbPath
cfg.Desktop.Path = desktopPath
cfg.Discord.TokenSource = "none"
require.NoError(t, config.Write(cfgPath, cfg))
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "sync", "--source", "wiretap"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "dm_messages=1")
out.Reset()
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "sync wiretap"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "sync wiretap import")
require.Contains(t, out.String(), "@me")
}
func TestParseSyncSources(t *testing.T) {
for _, tc := range []struct {
raw string
name string
discord bool
wiretap bool
}{
{"", "both", true, true},
{"both", "both", true, true},
{"key", "discord", true, false},
{"discord", "discord", true, false},
{"wiretap", "wiretap", false, true},
{"key+wiretap", "both", true, true},
} {
sources, err := parseSyncSources(tc.raw)
require.NoError(t, err)
require.Equal(t, tc.name, sources.name)
require.Equal(t, tc.discord, sources.discord)
require.Equal(t, tc.wiretap, sources.wiretap)
}
_, err := parseSyncSources("nope")
require.Error(t, err)
}
func TestReadCommandsAutoImportStaleShare(t *testing.T) {
@ -298,7 +789,7 @@ func TestShareCommandsRoundTripEmbeddings(t *testing.T) {
require.NoError(t, config.Write(cfgPath, cfg))
publisher := seedCLIStore(t, cfg.DBPath)
require.NoError(t, insertCLIEmbedding(ctx, publisher, "m1", "openai_compatible", "local-model", []float32{1, 0}))
require.NoError(t, insertCLIEmbedding(ctx, publisher, "m100", "openai_compatible", "local-model", []float32{1, 0}))
require.NoError(t, publisher.Close())
require.NoError(t, os.MkdirAll(cfg.Share.RepoPath, 0o755))
runGit(t, cfg.Share.RepoPath, "init")
@ -456,7 +947,7 @@ func TestShareUpdateImportsNewRemoteSnapshot(t *testing.T) {
require.Contains(t, out.String(), "newer git snapshot arrived")
}
func TestSyncImportsGitShareBeforeLiveDiscord(t *testing.T) {
func TestSyncSkipsGitShareByDefaultAndCanImportBeforeLiveDiscord(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
remoteRepo := filepath.Join(dir, "remote.git")
@ -477,6 +968,8 @@ func TestSyncImportsGitShareBeforeLiveDiscord(t *testing.T) {
cfg.Share.RepoPath = filepath.Join(dir, "reader-share")
cfg.Share.AutoUpdate = true
cfg.Share.StaleAfter = "15m"
cfg.Desktop.Path = filepath.Join(dir, "empty-discord")
require.NoError(t, os.MkdirAll(cfg.Desktop.Path, 0o755))
require.NoError(t, config.Write(cfgPath, cfg))
hybrid := &hybridSyncService{}
@ -497,21 +990,127 @@ func TestSyncImportsGitShareBeforeLiveDiscord(t *testing.T) {
}
require.NoError(t, rt.dispatch([]string{"sync", "--all"}))
require.True(t, hybrid.sawGitMessage)
require.False(t, hybrid.sawGitMessage)
reader, err := store.Open(ctx, cfg.DBPath)
require.NoError(t, err)
defer func() { _ = reader.Close() }()
rows, err := reader.ListMessages(ctx, store.MessageListOptions{Channel: "general", IncludeEmpty: true})
require.NoError(t, err)
contents := make([]string, 0, len(rows))
for _, row := range rows {
contents = append(contents, row.Content)
}
require.NotContains(t, contents, "automatic updates work")
require.Contains(t, contents, "live discord filled the delta")
require.NoError(t, reader.Close())
hybrid.sawGitMessage = false
require.NoError(t, rt.dispatch([]string{"sync", "--all", "--update=auto"}))
require.True(t, hybrid.sawGitMessage)
reader, err = store.Open(ctx, cfg.DBPath)
require.NoError(t, err)
defer func() { _ = reader.Close() }()
rows, err = reader.ListMessages(ctx, store.MessageListOptions{Channel: "general", IncludeEmpty: true})
require.NoError(t, err)
contents = contents[:0]
for _, row := range rows {
contents = append(contents, row.Content)
}
require.Contains(t, contents, "automatic updates work")
require.Contains(t, contents, "live discord filled the delta")
}
func TestSyncLockSerializesConcurrentRuns(t *testing.T) {
if goruntime.GOOS == "windows" {
t.Skip("sync lock is currently a no-op on Windows")
}
ctx := context.Background()
dir := t.TempDir()
cfg := config.Default()
cfg.DBPath = filepath.Join(dir, "discrawl.db")
cfgPath := filepath.Join(dir, "config.toml")
require.NoError(t, config.Write(cfgPath, cfg))
rt := &runtime{
ctx: ctx,
configPath: cfgPath,
cfg: cfg,
}
firstRelease, err := acquireSyncLock(ctx, filepath.Join(dir, ".discrawl-sync.lock"))
require.NoError(t, err)
defer func() { _ = firstRelease() }()
waitCtx, cancel := context.WithTimeout(ctx, 25*time.Millisecond)
defer cancel()
rt.ctx = waitCtx
err = rt.withSyncLock(func() error { return nil })
require.ErrorIs(t, err, context.DeadlineExceeded)
waitCtx, cancel = context.WithTimeout(ctx, 25*time.Millisecond)
defer cancel()
rt.ctx = waitCtx
err = rt.dispatch([]string{"update"})
require.ErrorIs(t, err, context.DeadlineExceeded)
}
func TestReadCommandsDoNotWaitForSyncLock(t *testing.T) {
if goruntime.GOOS == "windows" {
t.Skip("sync lock timing is flaky on Windows")
}
ctx := context.Background()
dir := t.TempDir()
cfg := config.Default()
cfg.DBPath = filepath.Join(dir, "discrawl.db")
cfgPath := filepath.Join(dir, "config.toml")
require.NoError(t, config.Write(cfgPath, cfg))
s := seedCLIStore(t, cfg.DBPath)
require.NoError(t, s.Close())
firstRelease, err := acquireSyncLock(ctx, filepath.Join(dir, ".discrawl-sync.lock"))
require.NoError(t, err)
defer func() { _ = firstRelease() }()
for _, args := range [][]string{
{"--config", cfgPath, "search", "automatic"},
{"--config", cfgPath, "messages", "--channel", "general", "--last", "1"},
{"--config", cfgPath, "sql", "select count(*) as total from messages"},
} {
runCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
var out bytes.Buffer
err := Run(runCtx, args, &out, &bytes.Buffer{})
cancel()
require.NoError(t, err, args)
require.NotEmpty(t, out.String(), args)
}
}
func TestReadCommandsMigrateOlderLocalStore(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cfg := config.Default()
cfg.DBPath = filepath.Join(dir, "discrawl.db")
cfgPath := filepath.Join(dir, "config.toml")
require.NoError(t, config.Write(cfgPath, cfg))
s := seedCLIStore(t, cfg.DBPath)
_, err := s.DB().ExecContext(ctx, `pragma user_version = 1`)
require.NoError(t, err)
require.NoError(t, s.Close())
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "automatic"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "automatic updates work")
reader, err := store.OpenReadOnly(ctx, cfg.DBPath)
require.NoError(t, err)
defer func() { _ = reader.Close() }()
var version int
require.NoError(t, reader.DB().QueryRowContext(ctx, `pragma user_version`).Scan(&version))
require.Equal(t, 2, version)
}
func seedCLIStore(t *testing.T, path string) *store.Store {
t.Helper()
ctx := context.Background()
@ -551,7 +1150,7 @@ func publishSnapshot(t *testing.T, ctx context.Context, s *store.Store, opts sha
func runGit(t *testing.T, dir string, args ...string) {
t.Helper()
// #nosec G204 -- fixed git argv in test setup.
cmd := exec.Command("git", args...)
cmd := exec.CommandContext(t.Context(), "git", args...)
cmd.Dir = dir
out, err := cmd.CombinedOutput()
require.NoError(t, err, string(out))
@ -1064,14 +1663,16 @@ func TestRuntimeInitSyncTailAndDoctor(t *testing.T) {
}
rt := newRuntime()
require.NoError(t, rt.runInit([]string{"--db", dbPath, "--with-embeddings", "--guild", "g2", "--account", "atlas"}))
require.NoError(t, rt.runInit([]string{"--db", dbPath, "--with-embeddings", "--guild", "g2"}))
cfg, err := config.Load(cfgPath)
require.NoError(t, err)
require.Equal(t, []string{"g1", "g2"}, cfg.GuildIDs)
require.Equal(t, "g2", cfg.DefaultGuildID)
require.Equal(t, "atlas", cfg.Discord.Account)
require.True(t, cfg.Search.Embeddings.Enabled)
cfg.Desktop.Path = filepath.Join(dir, "empty-discord")
require.NoError(t, os.MkdirAll(cfg.Desktop.Path, 0o755))
require.NoError(t, config.Write(cfgPath, cfg))
rt = newRuntime()
require.NoError(t, rt.withServices(true, func() error { return rt.runSync([]string{"--guilds", "g2"}) }))
@ -1104,6 +1705,42 @@ func TestRuntimeInitSyncTailAndDoctor(t *testing.T) {
require.Contains(t, out.String(), "discord_auth=ok")
}
func TestSyncModeDefaults(t *testing.T) {
t.Parallel()
tests := []struct {
name string
full bool
allChannels bool
since string
channels string
defaultLatest bool
latestOnly bool
skipMembers bool
explicitLatest bool
explicitSkip bool
}{
{name: "routine", defaultLatest: true, latestOnly: true, skipMembers: true},
{name: "all channels", allChannels: true},
{name: "full", full: true},
{name: "since", since: "2026-04-27T20:00:00Z"},
{name: "channels", channels: "c1"},
{name: "explicit latest", allChannels: true, explicitLatest: true, latestOnly: true},
{name: "explicit skip members", allChannels: true, explicitSkip: true, skipMembers: true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
defaultLatest := defaultLatestSyncMode(tt.full, tt.allChannels, tt.since, tt.channels)
require.Equal(t, tt.defaultLatest, defaultLatest)
require.Equal(t, tt.latestOnly, syncLatestOnly(tt.explicitLatest, defaultLatest))
require.Equal(t, tt.skipMembers, syncSkipsMembers(tt.explicitSkip, defaultLatest))
})
}
}
func TestDoctorChecksEnabledLocalEmbeddingProvider(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
@ -1219,7 +1856,7 @@ func TestRuntimeConfiguresAttachmentTextOnSyncer(t *testing.T) {
require.NoError(t, rt.withServices(true, func() error { return nil }))
require.True(t, fakeSync.attachmentTextEnabled)
cfg.Sync.AttachmentText = ptrBool(false)
cfg.Sync.AttachmentText = new(false)
require.NoError(t, config.Write(cfgPath, cfg))
require.NoError(t, rt.withServices(true, func() error { return nil }))
require.False(t, fakeSync.attachmentTextEnabled)
@ -1292,6 +1929,13 @@ func TestCommandUsageBranches(t *testing.T) {
{[]string{"--config", cfgPath, "sql", "--confirm", "select 1"}, "--confirm requires --unsafe"},
{[]string{"--config", cfgPath, "sql", "--unsafe", "delete from messages"}, "--unsafe requires --confirm"},
{[]string{"--config", cfgPath, "search"}, "search requires a query"},
{[]string{"--config", cfgPath, "search", "--dm", "--guild", "g1", "panic"}, "use either --dm or --guild/--guilds"},
{[]string{"--config", cfgPath, "messages", "--dm", "--guild", "g1"}, "use either --dm or --guild/--guilds"},
{[]string{"--config", cfgPath, "messages", "--dm", "--sync"}, "messages --sync is not supported with --dm"},
{[]string{"--config", cfgPath, "dms", "extra"}, "dms takes flags only"},
{[]string{"--config", cfgPath, "wiretap", "extra"}, "wiretap takes flags only"},
{[]string{"--config", cfgPath, "wiretap", "--max-file-bytes", "0"}, "--max-file-bytes must be positive"},
{[]string{"--config", cfgPath, "wiretap", "--watch-every", "1ms"}, "--watch-every must be at least 1s"},
{[]string{"--config", cfgPath, "members"}, "members requires a subcommand"},
{[]string{"--config", cfgPath, "members", "search"}, "members search requires a query"},
{[]string{"--config", cfgPath, "members", "bogus"}, `unknown members subcommand "bogus"`},
@ -1304,7 +1948,8 @@ func TestCommandUsageBranches(t *testing.T) {
{[]string{"--config", cfgPath, "embed", "--batch-size", "0"}, "--batch-size must be positive"},
{[]string{"--config", cfgPath, "publish", "extra"}, "publish takes no positional arguments"},
{[]string{"--config", cfgPath, "update", "extra"}, "update takes no positional arguments"},
{[]string{"--config", cfgPath, "subscribe", "one", "two"}, "subscribe takes at most one remote"},
{[]string{"--config", cfgPath, "subscribe"}, "subscribe requires one remote"},
{[]string{"--config", cfgPath, "subscribe", "one", "two"}, "subscribe requires one remote"},
}
for _, tc := range cases {
err := Run(ctx, tc.args, &bytes.Buffer{}, &bytes.Buffer{})
@ -1318,11 +1963,29 @@ func TestHelpers(t *testing.T) {
require.Equal(t, []string{"a", "b"}, csvList("a,b,a"))
require.Equal(t, "x", (&cliError{code: 2, err: assertErr("x")}).Error())
mode, err := syncShareUpdateMode([]string{"--all"})
require.NoError(t, err)
require.Equal(t, shareUpdateNever, mode)
mode, err = syncShareUpdateMode([]string{"--update=auto"})
require.NoError(t, err)
require.Equal(t, shareUpdateAuto, mode)
mode, err = syncShareUpdateMode([]string{"--update", "force"})
require.NoError(t, err)
require.Equal(t, shareUpdateForce, mode)
_, err = syncShareUpdateMode([]string{"--update"})
require.Error(t, err)
require.Equal(t, 2, ExitCode(usageErr(assertErr("x"))))
require.Equal(t, 4, ExitCode(authErr(assertErr("x"))))
require.Equal(t, 5, ExitCode(dbErr(assertErr("x"))))
require.Equal(t, 3, ExitCode(configErr(assertErr("x"))))
require.Equal(t, 1, ExitCode(assertErr("x")))
require.True(t, hybridSemanticUnavailable(store.ErrNoCompatibleEmbeddings))
require.True(t, hybridSemanticUnavailable(assertErr("semantic query embedding missing")))
require.False(t, hybridSemanticUnavailable(assertErr("other")))
opts, err := shareOptionsFromFlags("~/share", "git@example.com:org/archive.git", "")
require.NoError(t, err)
require.Equal(t, "git@example.com:org/archive.git", opts.Remote)
require.Equal(t, "main", opts.Branch)
var out bytes.Buffer
require.NoError(t, printHuman(&out, syncer.SyncStats{Guilds: 1}))
require.Contains(t, out.String(), "guilds=1")
@ -1338,10 +2001,6 @@ func discardLogger() *slog.Logger {
return slog.New(slog.DiscardHandler)
}
func ptrBool(value bool) *bool {
return &value
}
func TestRuntimeHelpersAndSubcommands(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
@ -1356,7 +2015,49 @@ func TestRuntimeHelpersAndSubcommands(t *testing.T) {
s, err := store.Open(ctx, dbPath)
require.NoError(t, err)
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}))
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "dm1", GuildID: store.DirectMessageGuildID, Kind: "dm", Name: "Alice", RawJSON: `{}`}))
require.NoError(t, s.UpsertMember(ctx, store.MemberRecord{GuildID: "g1", UserID: "u1", Username: "peter", RoleIDsJSON: `[]`, RawJSON: `{}`}))
base := time.Date(2026, 3, 8, 10, 0, 0, 0, time.UTC)
require.NoError(t, s.UpsertMessages(ctx, []store.MessageMutation{
{
Record: store.MessageRecord{
ID: "m1",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "general",
AuthorID: "u1",
AuthorName: "peter",
CreatedAt: base.Format(time.RFC3339Nano),
Content: "hello <@u1> in <#c1>",
NormalizedContent: "hello <@u1> in <#c1>",
RawJSON: `{"author":{"username":"peter"}}`,
},
Mentions: []store.MentionEventRecord{{
MessageID: "m1",
GuildID: "g1",
ChannelID: "c1",
AuthorID: "u1",
TargetType: "user",
TargetID: "u1",
TargetName: "peter",
EventAt: base.Format(time.RFC3339Nano),
}},
},
{
Record: store.MessageRecord{
ID: "dm-msg",
GuildID: store.DirectMessageGuildID,
ChannelID: "dm1",
ChannelName: "Alice",
AuthorID: "u2",
AuthorName: "Alice",
CreatedAt: base.Add(time.Minute).Format(time.RFC3339Nano),
Content: "private hello",
NormalizedContent: "private hello",
RawJSON: `{"source":"discord_desktop"}`,
},
},
}))
require.NoError(t, s.Close())
rt := &runtime{
@ -1376,15 +2077,57 @@ func TestRuntimeHelpersAndSubcommands(t *testing.T) {
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--hours", "6", "--last", "1"}))
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--days", "7", "--all"}))
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--days", "7", "--all", "--include-empty"}))
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--since", "2026-03-08T00:00:00Z", "--before", "2026-03-09T00:00:00Z", "--limit", "1"}))
require.NoError(t, rt.runMessages([]string{"--dm", "--channel", "Alice", "--last", "1"}))
require.NoError(t, rt.runDirectMessages([]string{"--list"}))
require.NoError(t, rt.runDirectMessages([]string{"--with", "Alice", "--search", "private", "--limit", "1"}))
require.NoError(t, rt.runDirectMessages([]string{"--with", "Alice", "--since", "2026-03-08T00:00:00Z", "--before", "2026-03-09T00:00:00Z", "--all"}))
require.NoError(t, rt.runMentions([]string{"--channel", "#general", "--target", "u2"}))
require.NoError(t, rt.runMentions([]string{"--channel", "#general", "--days", "7", "--type", "user"}))
require.NoError(t, rt.runDigest([]string{"--since", "12h", "--channel", "general", "--top-n", "2"}))
require.NoError(t, rt.runReport([]string{"--readme", filepath.Join(dir, "README.md")}))
require.NoError(t, rt.runSearch([]string{"--include-empty", "Peter"}))
require.NoError(t, rt.runChannels([]string{"show", "c1"}))
require.NoError(t, rt.runChannels([]string{"list"}))
require.NoError(t, rt.runStatus(nil))
require.NoError(t, rt.runAnalytics([]string{}))
require.NoError(t, rt.runTUI([]string{"--json", "--limit", "1", "--include-empty"}))
require.NoError(t, rt.runAnalytics([]string{"quiet", "--since", "1d"}))
require.NoError(t, rt.runAnalytics([]string{"trends", "--weeks", "1", "--channel", "general"}))
return nil
}))
}
func TestRunInitWritesDiscoveredGuildConfig(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "discrawl.db")
t.Setenv(config.DefaultTokenEnv, "env-token")
fakeSync := &fakeSyncService{discovered: []*discordgo.UserGuild{{ID: "g1"}, {ID: "g2"}}}
rt := &runtime{
ctx: ctx,
configPath: cfgPath,
stdout: &bytes.Buffer{},
stderr: &bytes.Buffer{},
logger: discardLogger(),
newDiscord: func(config.Config) (discordClient, error) { return &fakeDiscordClient{}, nil },
newSyncer: func(syncer.Client, *store.Store, *slog.Logger) syncService {
return fakeSync
},
}
require.NoError(t, rt.runInit([]string{"--db", dbPath, "--guild", "g2", "--with-embeddings"}))
cfg, err := config.Load(cfgPath)
require.NoError(t, err)
require.Equal(t, dbPath, cfg.DBPath)
require.Equal(t, []string{"g1", "g2"}, cfg.GuildIDs)
require.Equal(t, "g2", cfg.DefaultGuildID)
require.True(t, cfg.Search.Embeddings.Enabled)
require.Contains(t, rt.stdout.(*bytes.Buffer).String(), "g2")
}
func TestRunMembersShowUsesDefaultGuildForAmbiguousQuery(t *testing.T) {
t.Parallel()
@ -1559,6 +2302,17 @@ func TestRunMentionsValidation(t *testing.T) {
rt := &runtime{stderr: &bytes.Buffer{}}
rt.now = func() time.Time { return time.Date(2026, 3, 8, 12, 0, 0, 0, time.UTC) }
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"extra"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--hours", "-1"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--days", "-1"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--hours", "1", "--days", "1"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--hours", "1", "--since", "2026-03-01T00:00:00Z"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--limit", "-1"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--last", "-1"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--all", "--last", "1"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--limit", "1", "--last", "1"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--since", "bad"})))
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--before", "bad"})))
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--hours", "-1", "--channel", "general"})))
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--hours", "1", "--days", "1", "--channel", "general"})))
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--hours", "1", "--since", "2026-03-01T00:00:00Z", "--channel", "general"})))
@ -1657,6 +2411,8 @@ func TestCommandUsageErrors(t *testing.T) {
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--days", "-1"})))
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--days", "1", "--since", "2026-03-01T00:00:00Z"})))
require.Equal(t, 2, ExitCode(rt.runSync([]string{"--all", "--guild", "g1"})))
require.Equal(t, 2, ExitCode(rt.runSync([]string{"--update", "bogus"})))
require.Equal(t, 2, ExitCode(rt.runSync([]string{"--update=force", "--no-update"})))
require.Equal(t, 2, ExitCode(rt.runChannels(nil)))
require.Equal(t, 2, ExitCode(rt.runStatus([]string{"extra"})))
require.NoError(t, (&runtime{stdout: &bytes.Buffer{}}).runDoctor(nil))

View File

@ -0,0 +1,96 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"os"
"time"
"github.com/openclaw/crawlkit/control"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/store"
)
func (r *runtime) runMetadata(args []string) error {
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
fs.SetOutput(io.Discard)
jsonOut := fs.Bool("json", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("metadata takes flags only"))
}
if *jsonOut {
r.json = true
}
cfg := config.Default()
manifest := control.NewManifest("discrawl", "Discord Crawl", "discrawl")
manifest.Description = "Local-first Discord archive crawler."
manifest.Branding = control.Branding{SymbolName: "bubble.left.and.bubble.right.fill", AccentColor: "#5865f2", BundleIdentifier: "com.hnc.Discord"}
manifest.Paths = control.Paths{
DefaultConfig: config.ResolvePath(""),
ConfigEnv: config.DefaultConfigEnv,
DefaultDatabase: cfg.DBPath,
DefaultCache: cfg.CacheDir,
DefaultLogs: cfg.LogDir,
DefaultShare: cfg.Share.RepoPath,
}
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "tap", "tui", "git-share", "sql", "embeddings"}
manifest.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"discord", "desktop-cache", "sqlite", "git-share"}}
manifest.Commands = map[string]control.Command{
"status": {Title: "Status", Argv: []string{"discrawl", "status", "--json"}, JSON: true},
"doctor": {Title: "Doctor", Argv: []string{"discrawl", "doctor", "--json"}, JSON: true},
"sync": {Title: "Sync", Argv: []string{"discrawl", "--json", "sync"}, JSON: true, Mutates: true},
"tap": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "tap"}, JSON: true, Mutates: true},
"cache-import": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "cache-import"}, JSON: true, Mutates: true},
"wiretap": {Title: "Legacy desktop cache import", Argv: []string{"discrawl", "--json", "wiretap"}, JSON: true, Mutates: true, Legacy: true, Deprecated: true},
"tui": {Title: "Terminal browser", Argv: []string{"discrawl", "tui"}},
"tui-json": {Title: "Terminal browser rows", Argv: []string{"discrawl", "tui", "--json"}, JSON: true},
"publish": {Title: "Publish share", Argv: []string{"discrawl", "--json", "publish"}, JSON: true, Mutates: true},
"subscribe": {Title: "Subscribe share", Argv: []string{"discrawl", "--json", "subscribe"}, JSON: true, Mutates: true},
"update": {Title: "Update share", Argv: []string{"discrawl", "--json", "update"}, JSON: true, Mutates: true},
}
return r.print(manifest)
}
func controlStatus(configPath string, cfg config.Config, status store.Status, shareNeedsUpdate bool) control.Status {
counts := []control.Count{
control.NewCount("guilds", "Guilds", int64(status.GuildCount)),
control.NewCount("channels", "Channels", int64(status.ChannelCount)),
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
control.NewCount("messages", "Messages", int64(status.MessageCount)),
control.NewCount("members", "Members", int64(status.MemberCount)),
control.NewCount("embedding_backlog", "Embedding backlog", int64(status.EmbeddingBacklog)),
}
out := control.NewStatus("discrawl", fmt.Sprintf("%d messages across %d channels", status.MessageCount, status.ChannelCount))
out.State = "current"
out.ConfigPath = configPath
out.DatabasePath = status.DBPath
out.Counts = counts
if !status.LastSyncAt.IsZero() {
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
}
db := control.SQLiteDatabase("primary", "Discord archive", "archive", status.DBPath, true, counts)
out.DatabaseBytes = db.Bytes
out.WALBytes = fileSize(status.DBPath + "-wal")
out.Databases = []control.Database{db}
out.Share = &control.Share{
Enabled: cfg.ShareEnabled(),
RepoPath: cfg.Share.RepoPath,
Remote: cfg.Share.Remote,
Branch: cfg.Share.Branch,
NeedsUpdate: shareNeedsUpdate,
}
return out
}
func fileSize(path string) int64 {
info, err := os.Stat(path)
if err != nil {
return 0
}
return info.Size()
}

73
internal/cli/digest.go Normal file
View File

@ -0,0 +1,73 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"strconv"
"strings"
"time"
"github.com/openclaw/discrawl/internal/report"
)
func (r *runtime) runDigest(args []string) error {
fs := flag.NewFlagSet("digest", flag.ContinueOnError)
fs.SetOutput(io.Discard)
since := fs.String("since", "7d", "")
guild := fs.String("guild", "", "")
channel := fs.String("channel", "", "")
topN := fs.Int("top-n", 3, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("digest takes no positional arguments"))
}
lookback, err := parseLookback(*since)
if err != nil {
return usageErr(fmt.Errorf("parse --since: %w", err))
}
guildID := strings.TrimSpace(*guild)
if guildID == "" {
guildID = r.cfg.EffectiveDefaultGuildID()
}
digest, err := report.BuildDigest(r.ctx, r.store, report.DigestOptions{
Since: lookback,
GuildID: guildID,
Channel: *channel,
TopN: *topN,
})
if err != nil {
return err
}
return r.print(digest)
}
func parseLookback(value string) (time.Duration, error) {
value = strings.TrimSpace(value)
if value == "" {
return 0, errors.New("empty duration")
}
if daysValue, ok := strings.CutSuffix(value, "d"); ok {
days, err := strconv.Atoi(daysValue)
if err != nil {
return 0, fmt.Errorf("invalid day count: %w", err)
}
if days < 0 {
return 0, errors.New("negative duration")
}
return time.Duration(days) * 24 * time.Hour, nil
}
d, err := time.ParseDuration(value)
if err != nil {
return 0, err
}
if d < 0 {
return 0, errors.New("negative duration")
}
return d, nil
}

168
internal/cli/digest_test.go Normal file
View File

@ -0,0 +1,168 @@
package cli
import (
"bytes"
"context"
"encoding/json"
"path/filepath"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/store"
)
func TestParseLookback(t *testing.T) {
cases := []struct {
in string
want time.Duration
err bool
}{
{"7d", 7 * 24 * time.Hour, false},
{"30d", 30 * 24 * time.Hour, false},
{"72h", 72 * time.Hour, false},
{"30m", 30 * time.Minute, false},
{"", 0, true},
{"abc", 0, true},
{"-2d", 0, true},
{"-1h", 0, true},
}
for _, tc := range cases {
d, err := parseLookback(tc.in)
if tc.err {
require.Error(t, err, tc.in)
continue
}
require.NoError(t, err, tc.in)
require.Equal(t, tc.want, d, tc.in)
}
}
func TestDigestCommand(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.toml")
dbPath := filepath.Join(dir, "discrawl.db")
require.NoError(t, seedDigestCLIStore(ctx, dbPath))
cfg := config.Default()
cfg.DBPath = dbPath
cfg.DefaultGuildID = "g1"
require.NoError(t, config.Write(cfgPath, cfg))
t.Run("since 7d happy path", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "digest", "--since", "7d"}, &out, &bytes.Buffer{}))
require.Contains(t, out.String(), "general (text)")
require.Contains(t, out.String(), "Window:")
require.Contains(t, out.String(), "Totals: messages=")
})
t.Run("json output", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "digest", "--since", "7d"}, &out, &bytes.Buffer{}))
var payload map[string]any
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
require.Equal(t, "7d", payload["window_label"])
require.InEpsilon(t, 3, payload["top_n"], 0.001)
totals, ok := payload["totals"].(map[string]any)
require.True(t, ok)
require.InEpsilon(t, 2, totals["messages"], 0.001)
require.Contains(t, totals, "replies")
require.NotContains(t, totals, "threads")
})
t.Run("channel name filter", func(t *testing.T) {
var out bytes.Buffer
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "digest", "--channel", "incidents", "--since", "7d"}, &out, &bytes.Buffer{}))
var payload map[string]any
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
channels, ok := payload["channels"].([]any)
require.True(t, ok)
require.Len(t, channels, 1)
channel := channels[0].(map[string]any)
require.Equal(t, "incidents", channel["channel_name"])
})
t.Run("unknown flag fails", func(t *testing.T) {
err := Run(ctx, []string{"--config", cfgPath, "digest", "--bogus"}, &bytes.Buffer{}, &bytes.Buffer{})
require.Error(t, err)
require.Equal(t, 2, ExitCode(err))
})
t.Run("no positional args allowed", func(t *testing.T) {
err := Run(ctx, []string{"--config", cfgPath, "digest", "extra"}, &bytes.Buffer{}, &bytes.Buffer{})
require.Error(t, err)
require.Equal(t, 2, ExitCode(err))
})
}
func seedDigestCLIStore(ctx context.Context, path string) error {
s, err := store.Open(ctx, path)
if err != nil {
return err
}
defer func() { _ = s.Close() }()
now := time.Now().UTC()
if err := s.UpsertGuild(ctx, store.GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}); err != nil {
return err
}
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}); err != nil {
return err
}
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g1", Kind: "text", Name: "incidents", RawJSON: `{}`}); err != nil {
return err
}
if err := s.UpsertMember(ctx, store.MemberRecord{GuildID: "g1", UserID: "u1", Username: "alice", DisplayName: "Alice", RoleIDsJSON: `[]`, RawJSON: `{}`}); err != nil {
return err
}
if err := s.UpsertMember(ctx, store.MemberRecord{GuildID: "g1", UserID: "u2", Username: "bob", DisplayName: "Bob", RoleIDsJSON: `[]`, RawJSON: `{}`}); err != nil {
return err
}
return s.UpsertMessages(ctx, []store.MessageMutation{
{
Record: store.MessageRecord{
ID: "m1",
GuildID: "g1",
ChannelID: "c1",
ChannelName: "general",
AuthorID: "u1",
AuthorName: "Alice",
MessageType: 0,
CreatedAt: now.Add(-2 * time.Hour).Format(time.RFC3339Nano),
Content: "hello",
NormalizedContent: "hello",
RawJSON: `{}`,
},
Mentions: []store.MentionEventRecord{{
MessageID: "m1",
GuildID: "g1",
ChannelID: "c1",
AuthorID: "u1",
TargetType: "user",
TargetID: "u2",
TargetName: "Bob",
EventAt: now.Add(-2 * time.Hour).Format(time.RFC3339Nano),
}},
},
{
Record: store.MessageRecord{
ID: "m2",
GuildID: "g1",
ChannelID: "c2",
ChannelName: "incidents",
AuthorID: "u2",
AuthorName: "Bob",
MessageType: 0,
CreatedAt: now.Add(-90 * time.Minute).Format(time.RFC3339Nano),
Content: "incident",
NormalizedContent: "incident",
RawJSON: `{}`,
},
},
})
}

View File

@ -0,0 +1,146 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"strings"
"time"
"github.com/openclaw/discrawl/internal/store"
)
const defaultDMLast = 50
func (r *runtime) runDirectMessages(args []string) error {
fs := flag.NewFlagSet("dms", flag.ContinueOnError)
fs.SetOutput(io.Discard)
with := fs.String("with", "", "")
search := fs.String("search", "", "")
hours := fs.Int("hours", 0, "")
days := fs.Int("days", 0, "")
since := fs.String("since", "", "")
before := fs.String("before", "", "")
limit := fs.Int("limit", defaultDMLast, "")
last := fs.Int("last", defaultDMLast, "")
all := fs.Bool("all", false, "")
list := fs.Bool("list", false, "")
includeEmpty := fs.Bool("include-empty", false, "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(errors.New("dms takes flags only"))
}
if *hours < 0 {
return usageErr(errors.New("--hours must be >= 0"))
}
if *days < 0 {
return usageErr(errors.New("--days must be >= 0"))
}
if countNonZero(*hours > 0, *days > 0, strings.TrimSpace(*since) != "") > 1 {
return usageErr(errors.New("use only one of --hours, --days, or --since"))
}
if *limit < 0 {
return usageErr(errors.New("--limit must be >= 0"))
}
if *last < 0 {
return usageErr(errors.New("--last must be >= 0"))
}
if *all && *last > 0 && flagPassed(fs, "last") {
return usageErr(errors.New("use either --all or --last"))
}
if flagPassed(fs, "limit") && flagPassed(fs, "last") {
return usageErr(errors.New("use either --limit or --last"))
}
if *list || (strings.TrimSpace(*with) == "" && strings.TrimSpace(*search) == "" && noDMMessageTimeFilter(*hours, *days, *since, *before)) {
rows, err := r.store.DirectMessageConversations(r.ctx, store.DirectMessageConversationOptions{With: *with})
if err != nil {
return err
}
return r.print(rows)
}
sinceTime, beforeTime, err := r.parseMessageWindow(*hours, *days, *since, *before)
if err != nil {
return err
}
if query := strings.TrimSpace(*search); query != "" {
opts := store.SearchOptions{
Query: query,
GuildIDs: []string{store.DirectMessageGuildID},
Channel: *with,
Limit: *limit,
IncludeEmpty: *includeEmpty,
}
results, err := r.store.SearchMessages(r.ctx, opts)
if err != nil {
return err
}
return r.print(results)
}
messageLimit := *limit
messageLast := *last
switch {
case *all:
messageLimit = 0
messageLast = 0
case flagPassed(fs, "limit"):
messageLast = 0
default:
messageLimit = 0
}
rows, err := r.store.ListMessages(r.ctx, store.MessageListOptions{
GuildIDs: []string{store.DirectMessageGuildID},
Channel: *with,
Since: sinceTime,
Before: beforeTime,
Limit: messageLimit,
Last: messageLast,
IncludeEmpty: *includeEmpty,
})
if err != nil {
return err
}
return r.print(rows)
}
func (r *runtime) parseMessageWindow(hours, days int, since, before string) (time.Time, time.Time, error) {
var sinceTime time.Time
var beforeTime time.Time
var err error
if hours > 0 {
now := time.Now().UTC()
if r.now != nil {
now = r.now().UTC()
}
sinceTime = now.Add(-time.Duration(hours) * time.Hour)
}
if days > 0 {
now := time.Now().UTC()
if r.now != nil {
now = r.now().UTC()
}
sinceTime = now.Add(-time.Duration(days) * 24 * time.Hour)
}
if strings.TrimSpace(since) != "" {
sinceTime, err = time.Parse(time.RFC3339, since)
if err != nil {
return time.Time{}, time.Time{}, usageErr(fmt.Errorf("invalid --since: %w", err))
}
}
if strings.TrimSpace(before) != "" {
beforeTime, err = time.Parse(time.RFC3339, before)
if err != nil {
return time.Time{}, time.Time{}, usageErr(fmt.Errorf("invalid --before: %w", err))
}
}
return sinceTime, beforeTime, nil
}
func noDMMessageTimeFilter(hours, days int, since, before string) bool {
return hours == 0 && days == 0 && strings.TrimSpace(since) == "" && strings.TrimSpace(before) == ""
}

View File

@ -18,7 +18,7 @@ func TestDockerGitSourceSmoke(t *testing.T) {
t.Skip("docker is not installed")
}
root := repoRoot(t)
cmd := exec.Command("bash", filepath.Join(root, "scripts", "docker-git-source-smoke.sh"))
cmd := exec.CommandContext(t.Context(), "bash", filepath.Join(root, "scripts", "docker-git-source-smoke.sh"))
cmd.Dir = root
out, err := cmd.CombinedOutput()
require.NoError(t, err, string(out))
@ -26,7 +26,7 @@ func TestDockerGitSourceSmoke(t *testing.T) {
func repoRoot(t *testing.T) string {
t.Helper()
cmd := exec.Command("git", "rev-parse", "--show-toplevel")
cmd := exec.CommandContext(t.Context(), "git", "rev-parse", "--show-toplevel")
out, err := cmd.Output()
require.NoError(t, err)
return strings.TrimSpace(string(out))

View File

@ -1,10 +1,12 @@
package cli
import (
"errors"
"flag"
"fmt"
"strings"
"time"
"github.com/openclaw/discrawl/internal/store"
)
func (r *runtime) resolveSyncGuilds(guild, guilds string) []string {
@ -24,7 +26,7 @@ func (r *runtime) resolveSyncGuildsAll(guild, guilds string, all bool) ([]string
return r.resolveSyncGuilds(guild, guilds), nil
}
if len(csvList(guilds)) > 0 || strings.TrimSpace(guild) != "" {
return nil, fmt.Errorf("use either --all or --guild/--guilds")
return nil, errors.New("use either --all or --guild/--guilds")
}
return nil, nil
}
@ -34,6 +36,17 @@ func (r *runtime) resolveSearchGuilds(guild, guilds string) []string {
return csvList(strings.Join(requested, ","))
}
func directMessageGuildScope(dm bool, guild, guilds string) ([]string, error) {
if !dm {
requested := append(csvList(guilds), strings.TrimSpace(guild))
return csvList(strings.Join(requested, ",")), nil
}
if len(csvList(guilds)) > 0 || strings.TrimSpace(guild) != "" {
return nil, errors.New("use either --dm or --guild/--guilds")
}
return []string{store.DirectMessageGuildID}, nil
}
func csvList(raw string) []string {
if raw == "" {
return nil

View File

@ -1,13 +1,14 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"strings"
"time"
"github.com/steipete/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/store"
)
func (r *runtime) runMentions(args []string) error {
@ -27,19 +28,19 @@ func (r *runtime) runMentions(args []string) error {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("mentions takes flags only"))
return usageErr(errors.New("mentions takes flags only"))
}
if *days < 0 {
return usageErr(fmt.Errorf("--days must be >= 0"))
return usageErr(errors.New("--days must be >= 0"))
}
if *days > 0 && strings.TrimSpace(*since) != "" {
return usageErr(fmt.Errorf("use either --days or --since"))
return usageErr(errors.New("use either --days or --since"))
}
if *limit < 0 {
return usageErr(fmt.Errorf("--limit must be >= 0"))
return usageErr(errors.New("--limit must be >= 0"))
}
if targetTypeValue := strings.TrimSpace(*targetType); targetTypeValue != "" && targetTypeValue != "user" && targetTypeValue != "role" {
return usageErr(fmt.Errorf("--type must be user or role"))
return usageErr(errors.New("--type must be user or role"))
}
var sinceTime time.Time
@ -73,7 +74,7 @@ func (r *runtime) runMentions(args []string) error {
sinceTime.IsZero() &&
beforeTime.IsZero() &&
len(guildIDs) == 0 {
return usageErr(fmt.Errorf("mentions needs at least one filter"))
return usageErr(errors.New("mentions needs at least one filter"))
}
rows, err := r.store.ListMentions(r.ctx, store.MentionListOptions{

View File

@ -1,13 +1,14 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"strings"
"time"
"github.com/steipete/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/store"
)
const defaultMessageLimit = 200
@ -26,35 +27,36 @@ func (r *runtime) runMessages(args []string) error {
all := fs.Bool("all", false, "")
syncNow := fs.Bool("sync", false, "")
includeEmpty := fs.Bool("include-empty", false, "")
dm := fs.Bool("dm", false, "")
guildsFlag := fs.String("guilds", "", "")
guildFlag := fs.String("guild", "", "")
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("messages takes flags only"))
return usageErr(errors.New("messages takes flags only"))
}
if *hours < 0 {
return usageErr(fmt.Errorf("--hours must be >= 0"))
return usageErr(errors.New("--hours must be >= 0"))
}
if *days < 0 {
return usageErr(fmt.Errorf("--days must be >= 0"))
return usageErr(errors.New("--days must be >= 0"))
}
if countNonZero(*hours > 0, *days > 0, strings.TrimSpace(*since) != "") > 1 {
return usageErr(fmt.Errorf("use only one of --hours, --days, or --since"))
return usageErr(errors.New("use only one of --hours, --days, or --since"))
}
if *limit < 0 {
return usageErr(fmt.Errorf("--limit must be >= 0"))
return usageErr(errors.New("--limit must be >= 0"))
}
if *last < 0 {
return usageErr(fmt.Errorf("--last must be >= 0"))
return usageErr(errors.New("--last must be >= 0"))
}
limitSet := flagPassed(fs, "limit")
if *all && *last > 0 {
return usageErr(fmt.Errorf("use either --all or --last"))
return usageErr(errors.New("use either --all or --last"))
}
if limitSet && *last > 0 {
return usageErr(fmt.Errorf("use either --limit or --last"))
return usageErr(errors.New("use either --limit or --last"))
}
if *last > 0 {
*limit = 0
@ -90,9 +92,15 @@ func (r *runtime) runMessages(args []string) error {
}
}
guildIDs := r.resolveSearchGuilds(*guildFlag, *guildsFlag)
guildIDs, err := directMessageGuildScope(*dm, *guildFlag, *guildsFlag)
if err != nil {
return usageErr(err)
}
if *dm && *syncNow {
return usageErr(errors.New("messages --sync is not supported with --dm; run wiretap or sync --source wiretap first"))
}
if strings.TrimSpace(*channel) == "" && strings.TrimSpace(*author) == "" && sinceTime.IsZero() && beforeTime.IsZero() && len(guildIDs) == 0 {
return usageErr(fmt.Errorf("messages needs at least one filter"))
return usageErr(errors.New("messages needs at least one filter"))
}
if *all {
*limit = 0

View File

@ -2,15 +2,19 @@ package cli
import (
"encoding/json"
"errors"
"fmt"
"io"
"sort"
"strconv"
"strings"
"text/tabwriter"
"time"
"github.com/steipete/discrawl/internal/store"
"github.com/steipete/discrawl/internal/syncer"
"github.com/openclaw/discrawl/internal/discorddesktop"
"github.com/openclaw/discrawl/internal/report"
"github.com/openclaw/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/syncer"
)
func (r *runtime) print(value any) error {
@ -57,13 +61,35 @@ func printPlain(w io.Writer, value any) error {
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\n", formatTime(row.CreatedAt), row.GuildID, row.ChannelID, row.AuthorID, row.MessageID, row.Content)
}
return nil
case []store.DirectMessageConversationRow:
for _, row := range v {
_, _ = fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%s\t%s\n", row.ChannelID, row.Name, row.MessageCount, row.AuthorCount, formatTime(row.FirstMessageAt), formatTime(row.LastMessageAt))
}
return nil
case []store.MentionRow:
for _, row := range v {
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", formatTime(row.CreatedAt), row.GuildID, row.ChannelID, row.AuthorID, row.TargetType, row.TargetID, row.Content)
}
return nil
case report.Digest:
for _, row := range v.Channels {
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%d\t%d\t%d\n", row.ChannelID, row.ChannelName, row.Kind, row.GuildID, row.Messages, row.Replies, row.ActiveAuthors)
}
return nil
case report.Quiet:
for _, row := range v.Channels {
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%d\n", row.ChannelID, row.ChannelName, row.Kind, row.GuildID, row.LastMessage, row.DaysSilent)
}
return nil
case report.Trends:
for _, row := range v.Rows {
for _, week := range row.Weekly {
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%d\n", row.GuildID, row.ChannelID, row.ChannelName, row.Kind, formatTime(week.WeekStart), week.Messages)
}
}
return nil
default:
return fmt.Errorf("no plain printer")
return errors.New("no plain printer")
}
}
@ -74,11 +100,20 @@ Usage:
discrawl [global flags] <command> [args]
Commands:
metadata
version
init
sync
tail
tap
cache-import
wiretap
search
tui
messages
digest
analytics
dms
mentions
embed
sql
@ -101,9 +136,30 @@ func printRows(w io.Writer, cols []string, rows [][]string) error {
func printHuman(w io.Writer, value any) error {
switch v := value.(type) {
case syncRunStats:
if _, err := fmt.Fprintf(w, "source=%s\n", v.Source); err != nil {
return err
}
if v.Discord != nil {
if _, err := fmt.Fprintf(w, "discord_guilds=%d\ndiscord_channels=%d\ndiscord_threads=%d\ndiscord_members=%d\ndiscord_messages=%d\n",
v.Discord.Guilds, v.Discord.Channels, v.Discord.Threads, v.Discord.Members, v.Discord.Messages); err != nil {
return err
}
}
if v.Wiretap != nil {
if _, err := fmt.Fprintf(w, "wiretap_visited=%d\nwiretap_files=%d\nwiretap_unchanged=%d\nwiretap_fast_skipped=%d\nwiretap_messages=%d\nwiretap_dm_messages=%d\nwiretap_dm_channels=%d\nwiretap_guild_messages=%d\nwiretap_skipped_messages=%d\nwiretap_skipped_channels=%d\nwiretap_checkpoints=%d\n",
v.Wiretap.FilesVisited, v.Wiretap.FilesScanned, v.Wiretap.FilesUnchanged, v.Wiretap.CacheFilesFastSkipped, v.Wiretap.Messages, v.Wiretap.DMMessages, v.Wiretap.DMChannels, v.Wiretap.GuildMessages, v.Wiretap.SkippedMessages, v.Wiretap.SkippedChannels, v.Wiretap.Checkpoints); err != nil {
return err
}
}
return nil
case syncer.SyncStats:
_, err := fmt.Fprintf(w, "guilds=%d channels=%d threads=%d members=%d messages=%d\n", v.Guilds, v.Channels, v.Threads, v.Members, v.Messages)
return err
case discorddesktop.Stats:
_, err := fmt.Fprintf(w, "path=%s\nvisited=%d\nfiles=%d\nskipped=%d\nunchanged=%d\nfast_skipped=%d\nobjects=%d\nguilds=%d\nchannels=%d\nmessages=%d\ndm_messages=%d\ndm_channels=%d\nguild_messages=%d\nskipped_messages=%d\nskipped_channels=%d\ncheckpoints=%d\nfull_cache=%t\ndry_run=%t\n",
v.Path, v.FilesVisited, v.FilesScanned, v.FilesSkipped, v.FilesUnchanged, v.CacheFilesFastSkipped, v.JSONObjects, v.Guilds, v.Channels, v.Messages, v.DMMessages, v.DMChannels, v.GuildMessages, v.SkippedMessages, v.SkippedChannels, v.Checkpoints, v.FullCache, v.DryRun)
return err
case store.Status:
_, err := fmt.Fprintf(w, "db=%s\nguilds=%d\nchannels=%d\nthreads=%d\nmessages=%d\nmembers=%d\nembedding_backlog=%d\nlast_sync=%s\nlast_tail_event=%s\n",
v.DBPath, v.GuildCount, v.ChannelCount, v.ThreadCount, v.MessageCount, v.MemberCount, v.EmbeddingBacklog,
@ -138,6 +194,20 @@ func printHuman(w io.Writer, value any) error {
}
}
return nil
case []store.DirectMessageConversationRow:
tw := tabwriter.NewWriter(w, 2, 4, 2, ' ', 0)
_, _ = fmt.Fprintln(tw, "CHANNEL\tNAME\tMESSAGES\tAUTHORS\tFIRST\tLAST")
for _, row := range v {
_, _ = fmt.Fprintf(tw, "%s\t%s\t%d\t%d\t%s\t%s\n",
row.ChannelID,
row.Name,
row.MessageCount,
row.AuthorCount,
formatTime(row.FirstMessageAt),
formatTime(row.LastMessageAt),
)
}
return tw.Flush()
case []store.MentionRow:
for _, row := range v {
if _, err := fmt.Fprintf(w, "[%s/%s] %s -> %s:%s %s\n%s\n\n", row.GuildID, row.ChannelName, row.AuthorName, row.TargetType, firstNonEmpty(row.TargetName, row.TargetID), formatTime(row.CreatedAt), row.Content); err != nil {
@ -230,6 +300,74 @@ func printHuman(w io.Writer, value any) error {
_, _ = fmt.Fprintf(tw, "%s\t%s\t%s\t%s\n", row.GuildID, row.ID, row.Kind, row.Name)
}
return tw.Flush()
case report.Digest:
for _, channel := range v.Channels {
if _, err := fmt.Fprintf(w, "%s (%s)\n", channel.ChannelName, firstNonEmpty(channel.Kind, "unknown")); err != nil {
return err
}
if _, err := fmt.Fprintf(w, " messages=%d replies=%d authors=%d\n", channel.Messages, channel.Replies, channel.ActiveAuthors); err != nil {
return err
}
if _, err := fmt.Fprintf(w, " top posters %s\n", formatRankedCounts(channel.TopPosters)); err != nil {
return err
}
if _, err := fmt.Fprintf(w, " top mentions %s\n\n", formatRankedCounts(channel.TopMentions)); err != nil {
return err
}
}
if _, err := fmt.Fprintf(w, "Window: %s to %s (%s)\n", formatTime(v.Since), formatTime(v.Until), v.WindowLabel); err != nil {
return err
}
_, err := fmt.Fprintf(w, "Totals: messages=%d replies=%d channels=%d authors=%d\n", v.Totals.Messages, v.Totals.Replies, v.Totals.Channels, v.Totals.ActiveAuthors)
return err
case report.Quiet:
tw := tabwriter.NewWriter(w, 2, 4, 2, ' ', 0)
_, _ = fmt.Fprintln(tw, "CHANNEL\tKIND\tLAST MESSAGE\tDAYS SILENT")
for _, row := range v.Channels {
_, _ = fmt.Fprintf(tw, "%s\t%s\t%s\t%s\n",
row.ChannelName,
firstNonEmpty(row.Kind, "unknown"),
firstNonEmpty(row.LastMessage, "never"),
formatDaysSilent(row.DaysSilent),
)
}
if err := tw.Flush(); err != nil {
return err
}
if _, err := fmt.Fprintf(w, "\nWindow: %s to %s (%s)\n", formatTime(v.Since), formatTime(v.Until), formatWindowDuration(v.Until.Sub(v.Since))); err != nil {
return err
}
_, err := fmt.Fprintf(w, "Totals: channels=%d\n", v.Totals.Channels)
return err
case report.Trends:
tw := tabwriter.NewWriter(w, 2, 4, 2, ' ', 0)
header := []string{"CHANNEL", "KIND", "TOTAL"}
weekStarts := make([]time.Time, 0, v.Weeks)
if len(v.Rows) > 0 {
for _, week := range v.Rows[0].Weekly {
weekStarts = append(weekStarts, week.WeekStart)
}
} else {
for i := range v.Weeks {
weekStarts = append(weekStarts, v.Since.AddDate(0, 0, 7*i))
}
}
for _, start := range weekStarts {
header = append(header, start.Format(time.DateOnly))
}
_, _ = fmt.Fprintln(tw, strings.Join(header, "\t"))
for _, row := range v.Rows {
cols := []string{row.ChannelName, firstNonEmpty(row.Kind, "unknown"), strconv.Itoa(trendsRowTotal(row.Weekly))}
for _, week := range row.Weekly {
cols = append(cols, strconv.Itoa(week.Messages))
}
_, _ = fmt.Fprintln(tw, strings.Join(cols, "\t"))
}
if err := tw.Flush(); err != nil {
return err
}
_, err := fmt.Fprintf(w, "\nWindow: %s to %s (%d weeks)\n", formatTime(v.Since), formatTime(v.Until), v.Weeks)
return err
case map[string]any:
keys := make([]string, 0, len(v))
for key := range v {
@ -243,7 +381,7 @@ func printHuman(w io.Writer, value any) error {
}
return nil
default:
return fmt.Errorf("no human printer")
return errors.New("no human printer")
}
}
@ -278,3 +416,42 @@ func trimForTable(value string) string {
}
return value[:37] + "..."
}
func formatRankedCounts(rows []report.RankedCount) string {
if len(rows) == 0 {
return "-"
}
parts := make([]string, 0, len(rows))
for _, row := range rows {
parts = append(parts, fmt.Sprintf("%s (%d)", firstNonEmpty(row.Name, "unknown"), row.Count))
}
return strings.Join(parts, ", ")
}
func formatDaysSilent(days int) string {
if days < 0 {
return "-"
}
return strconv.Itoa(days)
}
func formatWindowDuration(d time.Duration) string {
if d <= 0 {
return "0"
}
if d%(24*time.Hour) == 0 {
return fmt.Sprintf("%dd", int(d/(24*time.Hour)))
}
if d%time.Hour == 0 {
return fmt.Sprintf("%dh", int(d/time.Hour))
}
return d.String()
}
func trendsRowTotal(weekly []report.WeeklyCount) int {
total := 0
for _, row := range weekly {
total += row.Messages
}
return total
}

View File

@ -7,8 +7,8 @@ import (
"github.com/stretchr/testify/require"
"github.com/steipete/discrawl/internal/store"
"github.com/steipete/discrawl/internal/syncer"
"github.com/openclaw/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/syncer"
)
func TestPrintRows(t *testing.T) {

View File

@ -9,9 +9,9 @@ import (
"os"
"strings"
"github.com/steipete/discrawl/internal/config"
"github.com/steipete/discrawl/internal/embed"
"github.com/steipete/discrawl/internal/store"
"github.com/openclaw/crawlkit/embed"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/store"
)
func (r *runtime) runSearch(args []string) error {
@ -22,17 +22,22 @@ func (r *runtime) runSearch(args []string) error {
author := fs.String("author", "", "")
limit := fs.Int("limit", 20, "")
includeEmpty := fs.Bool("include-empty", false, "")
dm := fs.Bool("dm", false, "")
guildsFlag := fs.String("guilds", "", "")
guildFlag := fs.String("guild", "", "")
if err := fs.Parse(args); err != nil {
if err := fs.Parse(permuteSearchFlags(args)); err != nil {
return usageErr(err)
}
if fs.NArg() != 1 {
return usageErr(fmt.Errorf("search requires a query"))
return usageErr(errors.New("search requires a query"))
}
guildIDs, err := directMessageGuildScope(*dm, *guildFlag, *guildsFlag)
if err != nil {
return usageErr(err)
}
opts := store.SearchOptions{
Query: fs.Arg(0),
GuildIDs: r.resolveSearchGuilds(*guildFlag, *guildsFlag),
GuildIDs: guildIDs,
Channel: *channel,
Author: *author,
Limit: *limit,
@ -62,6 +67,51 @@ func (r *runtime) runSearch(args []string) error {
}
}
func permuteSearchFlags(args []string) []string {
valueFlags := map[string]struct{}{
"--mode": {},
"--channel": {},
"--author": {},
"--limit": {},
"--guilds": {},
"--guild": {},
}
boolFlags := map[string]struct{}{
"--include-empty": {},
"--dm": {},
}
flags := make([]string, 0, len(args))
positionals := make([]string, 0, len(args))
for i := 0; i < len(args); i++ {
arg := args[i]
if arg == "--" {
positionals = append(positionals, args[i+1:]...)
break
}
if name, _, ok := strings.Cut(arg, "="); ok {
if _, known := valueFlags[name]; known {
flags = append(flags, arg)
continue
}
if _, known := boolFlags[name]; known {
flags = append(flags, arg)
continue
}
}
if _, known := boolFlags[arg]; known {
flags = append(flags, arg)
continue
}
if _, known := valueFlags[arg]; known && i+1 < len(args) {
flags = append(flags, arg, args[i+1])
i++
continue
}
positionals = append(positionals, arg)
}
return append(flags, positionals...)
}
func (r *runtime) searchMessagesSemantic(opts store.SearchOptions) ([]store.SearchResult, error) {
semanticOpts, err := r.semanticSearchOptions(opts)
if err != nil {
@ -102,12 +152,12 @@ func (r *runtime) searchMessagesHybrid(opts store.SearchOptions) ([]store.Search
func (r *runtime) semanticSearchOptions(opts store.SearchOptions) (store.SemanticSearchOptions, error) {
if !r.cfg.Search.Embeddings.Enabled {
return store.SemanticSearchOptions{}, fmt.Errorf("embeddings are disabled; enable [search.embeddings] first")
return store.SemanticSearchOptions{}, errors.New("embeddings are disabled; enable [search.embeddings] first")
}
providerFactory := r.newEmbed
if providerFactory == nil {
providerFactory = func(cfg config.EmbeddingsConfig) (embed.Provider, error) {
return embed.NewProvider(cfg)
return embed.NewProvider(crawlkitEmbeddingConfig(cfg))
}
}
provider, err := providerFactory(r.cfg.Search.Embeddings)
@ -153,7 +203,7 @@ func (r *runtime) runSQL(args []string) error {
return usageErr(err)
}
if *confirm && !*unsafe {
return usageErr(fmt.Errorf("--confirm requires --unsafe"))
return usageErr(errors.New("--confirm requires --unsafe"))
}
var query string
@ -179,7 +229,7 @@ func (r *runtime) runSQL(args []string) error {
return printRows(r.stdout, cols, rows)
}
if !*confirm {
return usageErr(fmt.Errorf("--unsafe requires --confirm"))
return usageErr(errors.New("--unsafe requires --confirm"))
}
if store.IsReadOnlySQL(query) {
@ -202,7 +252,7 @@ func (r *runtime) runSQL(args []string) error {
func (r *runtime) runMembers(args []string) error {
if len(args) == 0 {
return usageErr(fmt.Errorf("members requires a subcommand"))
return usageErr(errors.New("members requires a subcommand"))
}
switch args[0] {
case "list":
@ -215,7 +265,7 @@ func (r *runtime) runMembers(args []string) error {
return r.runMembersShow(args[1:])
case "search":
if len(args) < 2 {
return usageErr(fmt.Errorf("members search requires a query"))
return usageErr(errors.New("members search requires a query"))
}
rows, err := r.store.Members(r.ctx, "", strings.Join(args[1:], " "), 100)
if err != nil {
@ -235,7 +285,7 @@ func (r *runtime) runMembersShow(args []string) error {
return usageErr(err)
}
if fs.NArg() < 1 {
return usageErr(fmt.Errorf("members show requires a user id or query"))
return usageErr(errors.New("members show requires a user id or query"))
}
query := strings.Join(fs.Args(), " ")
@ -277,7 +327,7 @@ func (r *runtime) runMembersShow(args []string) error {
func (r *runtime) runChannels(args []string) error {
if len(args) == 0 {
return usageErr(fmt.Errorf("channels requires a subcommand"))
return usageErr(errors.New("channels requires a subcommand"))
}
rows, err := r.store.Channels(r.ctx, "")
if err != nil {
@ -288,7 +338,7 @@ func (r *runtime) runChannels(args []string) error {
return r.print(rows)
case "show":
if len(args) < 2 {
return usageErr(fmt.Errorf("channels show requires a channel id"))
return usageErr(errors.New("channels show requires a channel id"))
}
filtered := make([]store.ChannelRow, 0, 1)
for _, row := range rows {

View File

@ -1,16 +1,17 @@
package cli
import (
"errors"
"fmt"
"slices"
"strings"
"github.com/steipete/discrawl/internal/syncer"
"github.com/openclaw/discrawl/internal/syncer"
)
func (r *runtime) syncMessagesQuery(channel, guild, guilds string) error {
if r.syncer == nil {
return usageErr(fmt.Errorf("messages --sync requires Discord access"))
return usageErr(errors.New("messages --sync requires Discord access"))
}
opts, err := r.messageSyncOptions(channel, guild, guilds)
if err != nil {
@ -30,7 +31,7 @@ func (r *runtime) messageSyncOptions(channel, guild, guilds string) (syncer.Sync
channelFilter := strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(channel), "#"))
if channelFilter == "" {
if len(opts.GuildIDs) == 0 {
return opts, fmt.Errorf("messages --sync needs --channel or --guild")
return opts, errors.New("messages --sync needs --channel or --guild")
}
return opts, nil
}
@ -95,3 +96,27 @@ func hasBoolFlag(args []string, name string) bool {
}
return false
}
func boolFlagEnabled(args []string, name string) bool {
for _, arg := range args {
if arg == name {
return true
}
if raw, ok := strings.CutPrefix(arg, name+"="); ok {
switch strings.ToLower(strings.TrimSpace(raw)) {
case "1", "t", "true", "y", "yes", "on":
return true
}
}
}
return false
}
func hasHelpArg(args []string) bool {
for _, arg := range args {
if arg == "help" || arg == "--help" || arg == "-h" {
return true
}
}
return false
}

View File

@ -8,8 +8,8 @@ import (
"github.com/stretchr/testify/require"
"github.com/steipete/discrawl/internal/config"
"github.com/steipete/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/store"
)
func TestMessageSyncOptionsNumericChannelID(t *testing.T) {

View File

@ -1,11 +1,11 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"github.com/steipete/discrawl/internal/report"
"github.com/openclaw/discrawl/internal/report"
)
func (r *runtime) runReport(args []string) error {
@ -16,7 +16,7 @@ func (r *runtime) runReport(args []string) error {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("report takes no positional arguments"))
return usageErr(errors.New("report takes no positional arguments"))
}
activity, err := report.Build(r.ctx, r.store, report.Options{})
if err != nil {

View File

@ -1,19 +1,17 @@
package cli
import (
"errors"
"flag"
"fmt"
"io"
"os"
"github.com/steipete/discrawl/internal/config"
"github.com/steipete/discrawl/internal/report"
"github.com/steipete/discrawl/internal/share"
"github.com/steipete/discrawl/internal/store"
"github.com/openclaw/discrawl/internal/config"
"github.com/openclaw/discrawl/internal/report"
"github.com/openclaw/discrawl/internal/share"
"github.com/openclaw/discrawl/internal/store"
)
const defaultShareRemote = "https://github.com/openclaw/discord-backup.git"
func (r *runtime) runPublish(args []string) error {
fs := flag.NewFlagSet("publish", flag.ContinueOnError)
fs.SetOutput(io.Discard)
@ -29,7 +27,7 @@ func (r *runtime) runPublish(args []string) error {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("publish takes no positional arguments"))
return usageErr(errors.New("publish takes no positional arguments"))
}
opts, err := shareOptionsFromFlags(*repoPath, *remote, *branch)
if err != nil {
@ -98,13 +96,10 @@ func (r *runtime) runSubscribe(args []string) error {
if err := fs.Parse(args); err != nil {
return usageErr(err)
}
remote := defaultShareRemote
if fs.NArg() > 1 {
return usageErr(fmt.Errorf("subscribe takes at most one remote"))
}
if fs.NArg() == 1 {
remote = fs.Arg(0)
if fs.NArg() != 1 {
return usageErr(errors.New("subscribe requires one remote"))
}
remote := fs.Arg(0)
cfg, err := loadConfigOrDefault(r.configPath)
if err != nil {
return err
@ -130,34 +125,39 @@ func (r *runtime) runSubscribe(args []string) error {
if err != nil {
return configErr(err)
}
s, err := store.Open(r.ctx, dbPath)
if err != nil {
return dbErr(err)
}
defer func() { _ = s.Close() }()
expandedRepo, err := config.ExpandPath(cfg.Share.RepoPath)
if err != nil {
return configErr(err)
}
opts := share.Options{RepoPath: expandedRepo, Remote: cfg.Share.Remote, Branch: cfg.Share.Branch}
if *withEmbeddings {
applyEmbeddingShareOptions(&opts, cfg)
}
if err := share.Pull(r.ctx, opts); err != nil {
return err
}
manifest, imported, err := share.ImportIfChanged(r.ctx, s, opts)
if err != nil {
return err
}
return r.print(map[string]any{
"config_path": r.configPath,
"repo_path": opts.RepoPath,
"remote": opts.Remote,
"generated_at": manifest.GeneratedAt,
"tables": manifest.Tables,
"embeddings": manifest.Embeddings,
"imported": imported,
r.cfg = cfg
return r.withSyncLock(func() error {
s, err := store.Open(r.ctx, dbPath)
if err != nil {
return dbErr(err)
}
defer func() { _ = s.Close() }()
expandedRepo, err := config.ExpandPath(cfg.Share.RepoPath)
if err != nil {
return configErr(err)
}
opts := share.Options{RepoPath: expandedRepo, Remote: cfg.Share.Remote, Branch: cfg.Share.Branch, Progress: r.shareProgress}
if *withEmbeddings {
applyEmbeddingShareOptions(&opts, cfg)
}
r.setSyncLockPhase("share pull")
if err := share.Pull(r.ctx, opts); err != nil {
return err
}
r.setSyncLockPhase("share import")
manifest, imported, err := share.ImportIfChanged(r.ctx, s, opts)
if err != nil {
return err
}
return r.print(map[string]any{
"config_path": r.configPath,
"repo_path": opts.RepoPath,
"remote": opts.Remote,
"generated_at": manifest.GeneratedAt,
"tables": manifest.Tables,
"embeddings": manifest.Embeddings,
"imported": imported,
})
})
}
@ -172,18 +172,21 @@ func (r *runtime) runUpdate(args []string) error {
return usageErr(err)
}
if fs.NArg() != 0 {
return usageErr(fmt.Errorf("update takes no positional arguments"))
return usageErr(errors.New("update takes no positional arguments"))
}
opts, err := shareOptionsFromFlags(*repoPath, *remote, *branch)
if err != nil {
return err
}
opts.Progress = r.shareProgress
if *withEmbeddings {
applyEmbeddingShareOptions(&opts, r.cfg)
}
r.setSyncLockPhase("share pull")
if err := share.Pull(r.ctx, opts); err != nil {
return err
}
r.setSyncLockPhase("share import")
manifest, imported, err := share.ImportIfChanged(r.ctx, r.store, opts)
if err != nil {
return err
@ -204,7 +207,7 @@ func shareOptionsFromFlags(repoPath, remote, branch string) (share.Options, erro
return share.Options{}, configErr(err)
}
if remote == "" {
remote = defaultShareRemote
return share.Options{}, configErr(errors.New("share remote is required"))
}
if branch == "" {
branch = "main"

View File

@ -0,0 +1,110 @@
package cli
import (
"errors"
"fmt"
"strings"
"time"
"github.com/openclaw/discrawl/internal/share"
)
type shareUpdateMode string
const (
shareUpdateConfigured shareUpdateMode = "configured"
shareUpdateAuto shareUpdateMode = "auto"
shareUpdateNever shareUpdateMode = "never"
shareUpdateForce shareUpdateMode = "force"
)
func boolShareUpdateMode(enabled bool) shareUpdateMode {
if enabled {
return shareUpdateConfigured
}
return shareUpdateNever
}
func parseShareUpdateMode(raw string) (shareUpdateMode, error) {
switch shareUpdateMode(strings.ToLower(strings.TrimSpace(raw))) {
case "", shareUpdateAuto:
return shareUpdateAuto, nil
case shareUpdateNever:
return shareUpdateNever, nil
case shareUpdateForce:
return shareUpdateForce, nil
default:
return "", fmt.Errorf("invalid --update %q; use auto, never, or force", raw)
}
}
func syncShareUpdateMode(args []string) (shareUpdateMode, error) {
mode := shareUpdateNever
sawNoUpdate := false
sawUpdate := false
for i := 0; i < len(args); i++ {
arg := args[i]
switch {
case arg == "--no-update":
sawNoUpdate = true
mode = shareUpdateNever
case arg == "--update":
if i+1 >= len(args) {
return "", errors.New("--update requires auto, never, or force")
}
parsed, err := parseShareUpdateMode(args[i+1])
if err != nil {
return "", err
}
sawUpdate = true
mode = parsed
i++
case strings.HasPrefix(arg, "--update="):
parsed, err := parseShareUpdateMode(strings.TrimPrefix(arg, "--update="))
if err != nil {
return "", err
}
sawUpdate = true
mode = parsed
}
}
if sawNoUpdate && sawUpdate && mode != shareUpdateNever {
return "", errors.New("use either --no-update or --update, not both")
}
return mode, nil
}
func (r *runtime) shareProgress(progress share.ImportProgress) {
if progress.Phase == "" {
return
}
phase := "share " + progress.Phase
if progress.Table != "" {
phase += " " + progress.Table
}
if progress.File != "" {
phase += " " + progress.File
}
r.setSyncLockPhase(phase)
attrs := []any{"phase", progress.Phase}
if progress.Table != "" {
attrs = append(attrs, "table", progress.Table)
}
if progress.Rows != 0 {
attrs = append(attrs, "rows", progress.Rows)
}
if progress.TotalRows != 0 {
attrs = append(attrs, "total_rows", progress.TotalRows)
}
if progress.File != "" {
attrs = append(attrs, "file", progress.File, "file_index", progress.FileIndex, "file_count", progress.FileCount)
}
r.logger.Info("share import progress", attrs...)
}
func (r *runtime) nowUTC() time.Time {
if r.now != nil {
return r.now().UTC()
}
return time.Now().UTC()
}

100
internal/cli/sync_lock.go Normal file
View File

@ -0,0 +1,100 @@
package cli
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/openclaw/discrawl/internal/config"
)
func (r *runtime) withSyncLock(fn func() error) error {
if r.dbLockHeld {
return fn()
}
lockPath, err := r.syncLockPath()
if err != nil {
return err
}
release, err := acquireSyncLock(r.ctx, lockPath)
if err != nil {
return err
}
r.dbLockHeld = true
r.lockStarted = r.nowUTC()
r.setSyncLockPhase("locked")
defer func() {
r.dbLockHeld = false
r.lockStarted = time.Time{}
_ = release()
}()
return fn()
}
func (r *runtime) tryWithSyncLock(fn func() error) (bool, error) {
if r.dbLockHeld {
return true, fn()
}
lockPath, err := r.syncLockPath()
if err != nil {
return false, err
}
release, locked, err := tryAcquireSyncLock(lockPath)
if err != nil || !locked {
return locked, err
}
r.dbLockHeld = true
r.lockStarted = r.nowUTC()
r.setSyncLockPhase("locked")
defer func() {
r.dbLockHeld = false
r.lockStarted = time.Time{}
_ = release()
}()
return true, fn()
}
func (r *runtime) setSyncLockPhase(phase string) {
if !r.dbLockHeld {
return
}
path, err := r.syncLockPath()
if err != nil {
return
}
started := r.lockStarted
if started.IsZero() {
started = r.nowUTC()
}
body := fmt.Sprintf("pid=%d\nstarted_at=%s\nupdated_at=%s\nphase=%s\n",
os.Getpid(),
started.Format(time.RFC3339Nano),
r.nowUTC().Format(time.RFC3339Nano),
phase,
)
_ = os.WriteFile(path, []byte(body), 0o600)
}
func (r *runtime) syncLockPath() (string, error) {
dbPath, err := config.ExpandPath(r.cfg.DBPath)
if err != nil {
return "", configErr(err)
}
return filepath.Join(filepath.Dir(dbPath), ".discrawl-sync.lock"), nil
}
func syncLockErr(ctx context.Context, path string) error {
if ctx.Err() != nil {
if body, err := os.ReadFile(path); err == nil {
details := strings.TrimSpace(string(body))
if details != "" {
return fmt.Errorf("wait for sync lock %s (%s): %w", path, strings.ReplaceAll(details, "\n", ", "), ctx.Err())
}
}
return fmt.Errorf("wait for sync lock %s: %w", path, ctx.Err())
}
return nil
}

View File

@ -0,0 +1,13 @@
//go:build !unix && !windows
package cli
import "context"
func acquireSyncLock(context.Context, string) (func() error, error) {
return func() error { return nil }, nil
}
func tryAcquireSyncLock(string) (func() error, bool, error) {
return func() error { return nil }, true, nil
}

View File

@ -0,0 +1,79 @@
//go:build unix
package cli
import (
"context"
"errors"
"fmt"
"os"
"time"
"golang.org/x/sys/unix"
)
func acquireSyncLock(ctx context.Context, path string) (func() error, error) {
file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600)
if err != nil {
return nil, fmt.Errorf("open sync lock: %w", err)
}
locked := false
defer func() {
if !locked {
_ = file.Close()
}
}()
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
for {
err = unix.Flock(int(file.Fd()), unix.LOCK_EX|unix.LOCK_NB)
if err == nil {
locked = true
_, _ = file.Seek(0, 0)
_ = file.Truncate(0)
_, _ = fmt.Fprintf(file, "pid=%d\n", os.Getpid())
return func() error {
unlockErr := unix.Flock(int(file.Fd()), unix.LOCK_UN)
closeErr := file.Close()
if unlockErr != nil {
return unlockErr
}
return closeErr
}, nil
}
if !errors.Is(err, unix.EWOULDBLOCK) && !errors.Is(err, unix.EAGAIN) {
return nil, fmt.Errorf("acquire sync lock: %w", err)
}
select {
case <-ctx.Done():
return nil, syncLockErr(ctx, path)
case <-ticker.C:
}
}
}
func tryAcquireSyncLock(path string) (func() error, bool, error) {
file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600)
if err != nil {
return nil, false, fmt.Errorf("open sync lock: %w", err)
}
err = unix.Flock(int(file.Fd()), unix.LOCK_EX|unix.LOCK_NB)
if err != nil {
_ = file.Close()
if errors.Is(err, unix.EWOULDBLOCK) || errors.Is(err, unix.EAGAIN) {
return nil, false, nil
}
return nil, false, fmt.Errorf("acquire sync lock: %w", err)
}
_, _ = file.Seek(0, 0)
_ = file.Truncate(0)
_, _ = fmt.Fprintf(file, "pid=%d\n", os.Getpid())
return func() error {
unlockErr := unix.Flock(int(file.Fd()), unix.LOCK_UN)
closeErr := file.Close()
if unlockErr != nil {
return unlockErr
}
return closeErr
}, true, nil
}

View File

@ -0,0 +1,76 @@
//go:build windows
package cli
import (
"context"
"fmt"
"os"
"time"
"golang.org/x/sys/windows"
)
func acquireSyncLock(ctx context.Context, path string) (func() error, error) {
file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600)
if err != nil {
return nil, fmt.Errorf("open sync lock: %w", err)
}
locked := false
defer func() {
if !locked {
_ = file.Close()
}
}()
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
handle := windows.Handle(file.Fd())
overlapped := &windows.Overlapped{}
for {
err = windows.LockFileEx(handle, windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY, 0, 1, 0, overlapped)
if err == nil {
locked = true
_, _ = file.Seek(0, 0)
_ = file.Truncate(0)
_, _ = fmt.Fprintf(file, "pid=%d\n", os.Getpid())
return func() error {
unlockErr := windows.UnlockFileEx(handle, 0, 1, 0, overlapped)
closeErr := file.Close()
if unlockErr != nil {
return unlockErr
}
return closeErr
}, nil
}
select {
case <-ctx.Done():
return nil, syncLockErr(ctx, path)
case <-ticker.C:
}
}
}
func tryAcquireSyncLock(path string) (func() error, bool, error) {
file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600)
if err != nil {
return nil, false, fmt.Errorf("open sync lock: %w", err)
}
handle := windows.Handle(file.Fd())
overlapped := &windows.Overlapped{}
err = windows.LockFileEx(handle, windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY, 0, 1, 0, overlapped)
if err != nil {
_ = file.Close()
return nil, false, nil
}
_, _ = file.Seek(0, 0)
_ = file.Truncate(0)
_, _ = fmt.Fprintf(file, "pid=%d\n", os.Getpid())
return func() error {
unlockErr := windows.UnlockFileEx(handle, 0, 1, 0, overlapped)
closeErr := file.Close()
if unlockErr != nil {
return unlockErr
}
return closeErr
}, true, nil
}

View File

@ -0,0 +1,239 @@
package cli
import (
"context"
"errors"
"flag"
"fmt"
"strings"
"github.com/openclaw/crawlkit/tui"
"github.com/openclaw/discrawl/internal/store"
)
func (r *runtime) runTUI(args []string) error {
fs := flag.NewFlagSet("tui", flag.ContinueOnError)
fs.SetOutput(r.stderr)
fs.Usage = func() {
_, _ = fmt.Fprintln(fs.Output(), "Usage of tui:")
fs.PrintDefaults()
_, _ = fmt.Fprintln(fs.Output())
_, _ = fmt.Fprintln(fs.Output(), tui.ControlsHelp())
}
if hasHelpArg(args) {
fs.SetOutput(r.stdout)
}
channel := fs.String("channel", "", "channel id")
author := fs.String("author", "", "author/user id")
limit := fs.Int("limit", 200, "row limit")
includeEmpty := fs.Bool("include-empty", false, "include empty messages")
dm := fs.Bool("dm", false, "browse direct messages")
guildsFlag := fs.String("guilds", "", "comma-separated guild ids")
guildFlag := fs.String("guild", "", "guild id")
jsonOut := fs.Bool("json", false, "write browser rows as JSON")
if len(args) == 1 && args[0] == "help" {
fs.Usage()
return nil
}
if err := fs.Parse(args); err != nil {
if errors.Is(err, flag.ErrHelp) {
return nil
}
return usageErr(err)
}
if *jsonOut {
r.json = true
}
if fs.NArg() != 0 {
return usageErr(errors.New("tui takes flags only"))
}
if *limit <= 0 {
return usageErr(errors.New("tui --limit must be positive"))
}
guildIDs, err := r.resolveTUIGuilds(*dm, *guildFlag, *guildsFlag)
if err != nil {
return usageErr(err)
}
if r.store == nil {
return tui.Browse(r.ctx, tui.BrowseOptions{
AppName: "discrawl",
Title: "discrawl archive",
EmptyMessage: "discrawl has no local messages yet",
JSON: r.json,
Layout: tui.LayoutChat,
SourceKind: r.archiveSourceKind(),
SourceLocation: r.archiveSourceLocation(),
Stdout: r.stdout,
})
}
loadRows := func() ([]tui.Row, error) {
rows, err := r.store.ListMessagesWithThreadContext(r.ctx, store.MessageListOptions{
GuildIDs: guildIDs,
Channel: *channel,
Author: *author,
Last: *limit,
IncludeEmpty: *includeEmpty,
})
if err != nil {
return nil, err
}
return discordTUIRows(rows), nil
}
archiveRows, err := loadRows()
if err != nil {
return err
}
return tui.Browse(r.ctx, tui.BrowseOptions{
AppName: "discrawl",
Title: "discrawl archive",
EmptyMessage: "discrawl has no local messages yet",
Rows: archiveRows,
Refresh: func(context.Context) ([]tui.Row, error) { return loadRows() },
JSON: r.json,
Layout: tui.LayoutChat,
SourceKind: r.archiveSourceKind(),
SourceLocation: r.archiveSourceLocation(),
Stdout: r.stdout,
})
}
func (r *runtime) resolveTUIGuilds(dm bool, guild, guilds string) ([]string, error) {
guildIDs, err := directMessageGuildScope(dm, guild, guilds)
if err != nil || dm || len(guildIDs) > 0 {
return guildIDs, err
}
if defaultGuild := r.cfg.EffectiveDefaultGuildID(); defaultGuild != "" {
return []string{defaultGuild}, nil
}
return nil, nil
}
func (r *runtime) archiveSourceKind() string {
if strings.TrimSpace(r.cfg.Share.Remote) != "" {
return tui.SourceRemote
}
return tui.SourceLocal
}
func (r *runtime) archiveSourceLocation() string {
if strings.TrimSpace(r.cfg.Share.Remote) != "" {
return r.cfg.Share.Remote
}
return r.cfg.DBPath
}
func discordTUIRows(rows []store.MessageRow) []tui.Row {
items := make([]tui.Row, 0, len(rows))
for _, row := range rows {
content := discordDisplayContent(row)
title := strings.TrimSpace(content)
detail := discordDetailContent(row, content)
if title == "" {
title = firstNonEmpty(strings.TrimSpace(row.AttachmentText), row.MessageID)
}
tags := []string{row.GuildID, row.ChannelID}
if row.GuildID == "@me" {
tags = append(tags, "dm")
}
if row.Source != "" {
tags = append(tags, row.Source)
}
items = append(items, tui.Row{
Source: "discord",
Kind: "message",
ID: row.MessageID,
ParentID: row.ReplyToMessage,
Scope: discordScopeLabel(row),
Container: discordContainerLabel(row),
Author: discordAuthorLabel(row),
Title: title,
Text: content,
Detail: detail,
URL: discordMessageURL(row),
CreatedAt: formatTime(row.CreatedAt),
Tags: tags,
Fields: map[string]string{
"attachment_names": row.AttachmentNames,
"attachments": boolString(row.HasAttachments),
"author_id": row.AuthorID,
"channel_id": row.ChannelID,
"guild_id": row.GuildID,
"pinned": boolString(row.Pinned),
"reply_to": row.ReplyToMessage,
"source": row.Source,
},
})
}
return items
}
func discordDetailContent(row store.MessageRow, content string) string {
var parts []string
if strings.TrimSpace(content) != "" {
parts = append(parts, strings.TrimSpace(content))
}
if strings.TrimSpace(row.AttachmentText) != "" {
parts = append(parts, "Attachments\n"+strings.TrimSpace(row.AttachmentText))
}
if len(parts) == 0 {
return ""
}
return strings.Join(parts, "\n\n")
}
func discordDisplayContent(row store.MessageRow) string {
if content := strings.TrimSpace(row.DisplayContent); content != "" {
return content
}
return row.Content
}
func discordMessageURL(row store.MessageRow) string {
guildID := strings.TrimSpace(row.GuildID)
channelID := strings.TrimSpace(row.ChannelID)
messageID := strings.TrimSpace(row.MessageID)
if guildID == "" || channelID == "" || messageID == "" {
return ""
}
return "https://discord.com/channels/" + guildID + "/" + channelID + "/" + messageID
}
func discordScopeLabel(row store.MessageRow) string {
if row.GuildID == "@me" {
return "Direct messages"
}
return firstNonEmpty(row.GuildName, row.GuildID)
}
func discordContainerLabel(row store.MessageRow) string {
if row.GuildID == "@me" {
return firstNonEmpty(row.ChannelName, "DM "+compactDiscordID(row.ChannelID))
}
return firstNonEmpty(row.ChannelName, row.ChannelID)
}
func discordAuthorLabel(row store.MessageRow) string {
if name := strings.TrimSpace(row.AuthorName); name != "" {
return name
}
if id := strings.TrimSpace(row.AuthorID); id != "" {
return "user:" + compactDiscordID(id)
}
return ""
}
func compactDiscordID(id string) string {
id = strings.TrimSpace(id)
if len(id) <= 10 {
return id
}
return id[:6] + "..." + id[len(id)-4:]
}
func boolString(value bool) string {
if value {
return "true"
}
return ""
}

View File

@ -1,3 +1,3 @@
package cli
var version = "0.4.1"
var version = "0.7.0"

View File

@ -1,22 +1,22 @@
package config
import (
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"time"
"github.com/pelletier/go-toml/v2"
crawlconfig "github.com/openclaw/crawlkit/config"
)
const (
DefaultConfigEnv = "DISCRAWL_CONFIG"
DefaultTokenEnv = "DISCORD_BOT_TOKEN"
DefaultConfigEnv = "DISCRAWL_CONFIG"
DefaultTokenEnv = "DISCORD_BOT_TOKEN"
DefaultTokenKeyringService = "discrawl"
DefaultTokenKeyringAccount = "discord_bot_token"
)
type Config struct {
@ -28,19 +28,27 @@ type Config struct {
CacheDir string `toml:"cache_dir"`
LogDir string `toml:"log_dir"`
Discord DiscordConfig `toml:"discord"`
Desktop DesktopConfig `toml:"desktop"`
Sync SyncConfig `toml:"sync"`
Search SearchConfig `toml:"search"`
Share ShareConfig `toml:"share"`
}
type DiscordConfig struct {
TokenSource string `toml:"token_source"`
OpenClawConfig string `toml:"openclaw_config"`
Account string `toml:"account"`
TokenEnv string `toml:"token_env"`
TokenSource string `toml:"token_source"`
TokenEnv string `toml:"token_env"`
TokenKeyringService string `toml:"token_keyring_service"`
TokenKeyringAccount string `toml:"token_keyring_account"`
}
type DesktopConfig struct {
Path string `toml:"path"`
MaxFileBytes int64 `toml:"max_file_bytes"`
FullCache bool `toml:"full_cache"`
}
type SyncConfig struct {
Source string `toml:"source"`
Concurrency int `toml:"concurrency"`
RepairEvery string `toml:"repair_every"`
FullHistory bool `toml:"full_history"`
@ -77,49 +85,42 @@ type TokenResolution struct {
Path string
}
type OpenClawDiscord struct {
Token string
GuildIDs []string
Path string
}
type openClawConfig struct {
Channels struct {
Discord openClawDiscord `json:"discord"`
} `json:"channels"`
}
type openClawDiscord struct {
Token string `json:"token"`
Accounts map[string]openClawDiscordAcct `json:"accounts"`
Guilds map[string]json.RawMessage `json:"guilds"`
}
type openClawDiscordAcct struct {
Token string `json:"token"`
Guilds map[string]json.RawMessage `json:"guilds"`
}
var appConfig = crawlconfig.App{Name: "discrawl", ConfigEnv: DefaultConfigEnv, BaseDir: "~/.discrawl", LegacyBaseDir: "~/.discrawl"}
func Default() Config {
home, _ := os.UserHomeDir()
base := filepath.Join(home, ".discrawl")
paths, err := appConfig.DefaultPaths()
if err != nil {
base := filepath.Join(home, ".discrawl")
paths = crawlconfig.Paths{
DBPath: filepath.Join(base, "discrawl.db"),
CacheDir: filepath.Join(base, "cache"),
LogDir: filepath.Join(base, "logs"),
ShareDir: filepath.Join(base, "share"),
}
}
return Config{
Version: 1,
DBPath: filepath.Join(base, "discrawl.db"),
CacheDir: filepath.Join(base, "cache"),
LogDir: filepath.Join(base, "logs"),
DBPath: paths.DBPath,
CacheDir: paths.CacheDir,
LogDir: paths.LogDir,
DefaultGuildID: "",
Discord: DiscordConfig{
TokenSource: "openclaw",
OpenClawConfig: filepath.Join(home, ".openclaw", "openclaw.json"),
Account: "default",
TokenEnv: DefaultTokenEnv,
TokenSource: "env",
TokenEnv: DefaultTokenEnv,
TokenKeyringService: DefaultTokenKeyringService,
TokenKeyringAccount: DefaultTokenKeyringAccount,
},
Desktop: DesktopConfig{
Path: defaultDiscordDesktopPath(home),
MaxFileBytes: 64 << 20,
},
Sync: SyncConfig{
Source: "both",
Concurrency: defaultSyncConcurrency(),
RepairEvery: "6h",
FullHistory: true,
AttachmentText: boolPtr(true),
AttachmentText: new(true),
},
Search: SearchConfig{
DefaultMode: "fts",
@ -134,7 +135,7 @@ func Default() Config {
},
},
Share: ShareConfig{
RepoPath: filepath.Join(base, "share"),
RepoPath: paths.ShareDir,
Branch: "main",
AutoUpdate: true,
StaleAfter: "15m",
@ -155,14 +156,12 @@ func defaultSyncConcurrency() int {
}
func ResolvePath(flagPath string) string {
if strings.TrimSpace(flagPath) != "" {
return flagPath
path, err := appConfig.ResolveConfigPath(flagPath)
if err != nil {
home, _ := os.UserHomeDir()
return filepath.Join(home, ".discrawl", "config.toml")
}
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
return envPath
}
home, _ := os.UserHomeDir()
return filepath.Join(home, ".discrawl", "config.toml")
return path
}
func Load(path string) (Config, error) {
@ -171,13 +170,9 @@ func Load(path string) (Config, error) {
if err != nil {
return Config{}, err
}
data, err := os.ReadFile(expanded)
if err != nil {
if err := crawlconfig.LoadTOML(expanded, &cfg); err != nil {
return Config{}, err
}
if err := toml.Unmarshal(data, &cfg); err != nil {
return Config{}, fmt.Errorf("parse config: %w", err)
}
if err := cfg.Normalize(); err != nil {
return Config{}, err
}
@ -192,14 +187,7 @@ func Write(path string, cfg Config) error {
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(expanded), 0o755); err != nil {
return fmt.Errorf("mkdir config dir: %w", err)
}
data, err := toml.Marshal(cfg)
if err != nil {
return fmt.Errorf("marshal config: %w", err)
}
return os.WriteFile(expanded, data, 0o600)
return crawlconfig.WriteTOML(expanded, cfg, 0o600)
}
func (c *Config) Normalize() error {
@ -221,26 +209,40 @@ func (c *Config) Normalize() error {
c.LogDir = def.LogDir
}
}
c.Discord.TokenSource = strings.ToLower(strings.TrimSpace(c.Discord.TokenSource))
c.Discord.TokenEnv = strings.TrimSpace(c.Discord.TokenEnv)
c.Discord.TokenKeyringService = strings.TrimSpace(c.Discord.TokenKeyringService)
c.Discord.TokenKeyringAccount = strings.TrimSpace(c.Discord.TokenKeyringAccount)
if c.Discord.TokenSource == "" {
c.Discord.TokenSource = "openclaw"
}
if c.Discord.OpenClawConfig == "" {
c.Discord.OpenClawConfig = Default().Discord.OpenClawConfig
}
if c.Discord.Account == "" {
c.Discord.Account = "default"
c.Discord.TokenSource = "env"
}
if c.Discord.TokenEnv == "" {
c.Discord.TokenEnv = DefaultTokenEnv
}
if c.Discord.TokenKeyringService == "" {
c.Discord.TokenKeyringService = DefaultTokenKeyringService
}
if c.Discord.TokenKeyringAccount == "" {
c.Discord.TokenKeyringAccount = DefaultTokenKeyringAccount
}
if c.Desktop.Path == "" {
c.Desktop.Path = defaultDiscordDesktopPath(homeDir())
}
if c.Desktop.MaxFileBytes <= 0 {
c.Desktop.MaxFileBytes = 64 << 20
}
if c.Sync.Concurrency <= 0 {
c.Sync.Concurrency = defaultSyncConcurrency()
}
c.Sync.Source = strings.ToLower(strings.TrimSpace(c.Sync.Source))
if c.Sync.Source == "" {
c.Sync.Source = "both"
}
if c.Sync.RepairEvery == "" {
c.Sync.RepairEvery = "6h"
}
if c.Sync.AttachmentText == nil {
c.Sync.AttachmentText = boolPtr(true)
c.Sync.AttachmentText = new(true)
}
if c.Search.DefaultMode == "" {
c.Search.DefaultMode = "fts"
@ -294,6 +296,28 @@ func (c *Config) Normalize() error {
return nil
}
func defaultDiscordDesktopPath(home string) string {
switch runtime.GOOS {
case "darwin":
return filepath.Join(home, "Library", "Application Support", "discord")
case "windows":
if appData := strings.TrimSpace(os.Getenv("APPDATA")); appData != "" {
return filepath.Join(appData, "discord")
}
return filepath.Join(home, "AppData", "Roaming", "discord")
default:
if configHome := strings.TrimSpace(os.Getenv("XDG_CONFIG_HOME")); configHome != "" {
return filepath.Join(configHome, "discord")
}
return filepath.Join(home, ".config", "discord")
}
}
func homeDir() string {
home, _ := os.UserHomeDir()
return home
}
func (c Config) EffectiveDefaultGuildID() string {
if c.DefaultGuildID != "" {
return c.DefaultGuildID
@ -317,153 +341,18 @@ func (c Config) ShareEnabled() bool {
}
func EnsureRuntimeDirs(cfg Config) error {
paths := []string{cfg.CacheDir, cfg.LogDir, filepath.Dir(cfg.DBPath)}
for _, path := range paths {
expanded, err := ExpandPath(path)
if err != nil {
return err
}
if err := os.MkdirAll(expanded, 0o755); err != nil {
return fmt.Errorf("mkdir %s: %w", expanded, err)
}
}
return nil
return crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{
DBPath: cfg.DBPath,
CacheDir: cfg.CacheDir,
LogDir: cfg.LogDir,
})
}
func ExpandPath(path string) (string, error) {
if strings.TrimSpace(path) == "" {
return "", errors.New("empty path")
}
if strings.HasPrefix(path, "~/") || path == "~" {
home, err := os.UserHomeDir()
if err != nil {
return "", fmt.Errorf("home dir: %w", err)
}
if path == "~" {
path = home
} else {
path = filepath.Join(home, strings.TrimPrefix(path, "~/"))
}
}
return filepath.Clean(os.ExpandEnv(path)), nil
}
func ResolveDiscordToken(cfg Config) (TokenResolution, error) {
if err := cfg.Normalize(); err != nil {
return TokenResolution{}, err
}
if cfg.Discord.TokenSource == "none" {
return TokenResolution{}, errors.New("discord token disabled by config")
}
if cfg.Discord.TokenSource != "env" {
openClaw, err := LoadOpenClawDiscord(cfg.Discord.OpenClawConfig, cfg.Discord.Account)
if err == nil && openClaw.Token != "" {
return TokenResolution{Token: openClaw.Token, Source: "openclaw", Path: openClaw.Path}, nil
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return TokenResolution{}, err
}
}
if envToken := NormalizeBotToken(os.Getenv(cfg.Discord.TokenEnv)); envToken != "" {
return TokenResolution{Token: envToken, Source: "env", Path: cfg.Discord.TokenEnv}, nil
}
return TokenResolution{}, errors.New("discord token not found in env or openclaw config")
}
func LoadOpenClawDiscord(path, account string) (OpenClawDiscord, error) {
paths, err := openClawCandidates(path)
if err != nil {
return OpenClawDiscord{}, err
}
for _, candidate := range paths {
info, err := loadOpenClawDiscordFile(candidate, account)
if err == nil && info.Token != "" {
return info, nil
}
if err != nil && !errors.Is(err, os.ErrNotExist) {
return OpenClawDiscord{}, err
}
}
return OpenClawDiscord{}, os.ErrNotExist
}
func loadOpenClawDiscordFile(path, account string) (OpenClawDiscord, error) {
expanded, err := ExpandPath(path)
if err != nil {
return OpenClawDiscord{}, err
}
data, err := os.ReadFile(expanded)
if err != nil {
return OpenClawDiscord{}, err
}
var payload openClawConfig
if err := json.Unmarshal(data, &payload); err != nil {
return OpenClawDiscord{}, fmt.Errorf("parse openclaw config: %w", err)
}
discord := payload.Channels.Discord
token := expandOpenClawToken(discord.Token)
guildIDs := mapKeys(discord.Guilds)
if token == "" {
acct := discord.Accounts[normalizeAccount(account)]
if acct.Token == "" && account != normalizeAccount(account) {
acct = discord.Accounts[account]
}
token = expandOpenClawToken(acct.Token)
if len(guildIDs) == 0 {
guildIDs = mapKeys(acct.Guilds)
}
}
return OpenClawDiscord{
Token: token,
GuildIDs: guildIDs,
Path: expanded,
}, nil
}
func openClawCandidates(path string) ([]string, error) {
expanded, err := ExpandPath(path)
if err != nil {
return nil, err
}
candidates := []string{expanded}
matches, err := filepath.Glob(expanded + ".bak*")
if err != nil {
return nil, err
}
sort.Strings(matches)
candidates = append(candidates, matches...)
return uniqueStrings(candidates), nil
}
func NormalizeBotToken(raw string) string {
raw = strings.TrimSpace(raw)
raw = strings.TrimPrefix(raw, "Bot ")
return strings.TrimSpace(raw)
}
func expandOpenClawToken(raw string) string {
return NormalizeBotToken(os.ExpandEnv(raw))
}
func normalizeAccount(account string) string {
account = strings.TrimSpace(strings.ToLower(account))
if account == "" {
return "default"
}
return account
}
func boolPtr(value bool) *bool {
return &value
}
func mapKeys[V any](m map[string]V) []string {
keys := make([]string, 0, len(m))
for key := range m {
keys = append(keys, key)
}
sort.Strings(keys)
return keys
return filepath.Clean(os.ExpandEnv(crawlconfig.ExpandHome(path))), nil
}
func uniqueStrings(in []string) []string {

View File

@ -7,6 +7,7 @@ import (
"testing"
"github.com/stretchr/testify/require"
"github.com/zalando/go-keyring"
)
func TestNormalizeFillsDefaults(t *testing.T) {
@ -15,9 +16,10 @@ func TestNormalizeFillsDefaults(t *testing.T) {
cfg := Config{}
require.NoError(t, cfg.Normalize())
require.Equal(t, 1, cfg.Version)
require.Equal(t, "openclaw", cfg.Discord.TokenSource)
require.Equal(t, "default", cfg.Discord.Account)
require.Equal(t, "env", cfg.Discord.TokenSource)
require.Equal(t, DefaultTokenEnv, cfg.Discord.TokenEnv)
require.Equal(t, DefaultTokenKeyringService, cfg.Discord.TokenKeyringService)
require.Equal(t, DefaultTokenKeyringAccount, cfg.Discord.TokenKeyringAccount)
require.Equal(t, defaultSyncConcurrency(), cfg.Sync.Concurrency)
require.GreaterOrEqual(t, cfg.Sync.Concurrency, 8)
require.LessOrEqual(t, cfg.Sync.Concurrency, 32)
@ -53,32 +55,8 @@ func TestDefaultSyncConcurrencyBounds(t *testing.T) {
require.Equal(t, 32, defaultSyncConcurrency())
}
func TestResolveDiscordTokenPrefersOpenClaw(t *testing.T) {
dir := t.TempDir()
openClawPath := filepath.Join(dir, "openclaw.json")
require.NoError(t, os.WriteFile(openClawPath, []byte(`{
"channels": {
"discord": {
"token": "Bot config-token",
"guilds": { "g1": {}, "g2": {} }
}
}
}`), 0o600))
t.Setenv(DefaultTokenEnv, "env-token")
func TestResolveDiscordTokenFromEnv(t *testing.T) {
cfg := Default()
cfg.Discord.OpenClawConfig = openClawPath
token, err := ResolveDiscordToken(cfg)
require.NoError(t, err)
require.Equal(t, "config-token", token.Token)
require.Equal(t, "openclaw", token.Source)
}
func TestResolveDiscordTokenFallsBackToEnv(t *testing.T) {
cfg := Default()
cfg.Discord.TokenSource = "env"
cfg.Discord.OpenClawConfig = filepath.Join(t.TempDir(), "missing.json")
t.Setenv(DefaultTokenEnv, "Bot env-token")
token, err := ResolveDiscordToken(cfg)
@ -87,6 +65,64 @@ func TestResolveDiscordTokenFallsBackToEnv(t *testing.T) {
require.Equal(t, "env", token.Source)
}
func TestResolveDiscordTokenFallsBackToKeyring(t *testing.T) {
cfg := Default()
t.Setenv(DefaultTokenEnv, "")
stubDiscordTokenKeyring(t, func(service, account string) (string, error) {
require.Equal(t, DefaultTokenKeyringService, service)
require.Equal(t, DefaultTokenKeyringAccount, account)
return "Bot keyring-token", nil
})
token, err := ResolveDiscordToken(cfg)
require.NoError(t, err)
require.Equal(t, "keyring-token", token.Token)
require.Equal(t, "keyring", token.Source)
require.Equal(t, "discrawl/discord_bot_token", token.Path)
}
func TestResolveDiscordTokenFromKeyringSource(t *testing.T) {
cfg := Default()
cfg.Discord.TokenSource = "keyring"
cfg.Discord.TokenKeyringService = " custom-service "
cfg.Discord.TokenKeyringAccount = " custom-account "
t.Setenv(DefaultTokenEnv, "ignored-env-token")
stubDiscordTokenKeyring(t, func(service, account string) (string, error) {
require.Equal(t, "custom-service", service)
require.Equal(t, "custom-account", account)
return "custom-keyring-token", nil
})
token, err := ResolveDiscordToken(cfg)
require.NoError(t, err)
require.Equal(t, "custom-keyring-token", token.Token)
require.Equal(t, "keyring", token.Source)
require.Equal(t, "custom-service/custom-account", token.Path)
}
func TestResolveDiscordTokenFromCustomEnv(t *testing.T) {
cfg := Default()
cfg.Discord.TokenEnv = "DISCRAWL_TEST_DISCORD_TOKEN"
t.Setenv("DISCRAWL_TEST_DISCORD_TOKEN", "custom-env-token")
token, err := ResolveDiscordToken(cfg)
require.NoError(t, err)
require.Equal(t, "custom-env-token", token.Token)
require.Equal(t, "DISCRAWL_TEST_DISCORD_TOKEN", token.Path)
}
func TestResolveDiscordTokenRequiresEnvValue(t *testing.T) {
cfg := Default()
t.Setenv(DefaultTokenEnv, "")
stubDiscordTokenKeyring(t, func(_, _ string) (string, error) {
return "", keyring.ErrNotFound
})
_, err := ResolveDiscordToken(cfg)
require.ErrorContains(t, err, `discord token not found in environment variable "DISCORD_BOT_TOKEN" or keyring item "discrawl"/"discord_bot_token"`)
require.ErrorIs(t, err, keyring.ErrNotFound)
}
func TestResolveDiscordTokenDisabled(t *testing.T) {
cfg := Default()
cfg.Discord.TokenSource = "none"
@ -96,51 +132,12 @@ func TestResolveDiscordTokenDisabled(t *testing.T) {
require.ErrorContains(t, err, "discord token disabled")
}
func TestLoadOpenClawDiscordFromAccount(t *testing.T) {
t.Parallel()
func TestResolveDiscordTokenRejectsUnsupportedSource(t *testing.T) {
cfg := Default()
cfg.Discord.TokenSource = "legacy"
dir := t.TempDir()
openClawPath := filepath.Join(dir, "openclaw.json")
require.NoError(t, os.WriteFile(openClawPath, []byte(`{
"channels": {
"discord": {
"accounts": {
"default": {
"token": "acct-token",
"guilds": { "g3": {} }
}
}
}
}
}`), 0o600))
info, err := LoadOpenClawDiscord(openClawPath, "default")
require.NoError(t, err)
require.Equal(t, "acct-token", info.Token)
require.Equal(t, []string{"g3"}, info.GuildIDs)
}
func TestLoadOpenClawDiscordExpandsEnvToken(t *testing.T) {
dir := t.TempDir()
openClawPath := filepath.Join(dir, "openclaw.json")
t.Setenv("DISCRAWL_TEST_TOKEN", "Bot env-expanded-token")
require.NoError(t, os.WriteFile(openClawPath, []byte(`{
"channels": {
"discord": {
"accounts": {
"default": {
"token": "${DISCRAWL_TEST_TOKEN}",
"guilds": { "g3": {} }
}
}
}
}
}`), 0o600))
info, err := LoadOpenClawDiscord(openClawPath, "default")
require.NoError(t, err)
require.Equal(t, "env-expanded-token", info.Token)
require.Equal(t, []string{"g3"}, info.GuildIDs)
_, err := ResolveDiscordToken(cfg)
require.ErrorContains(t, err, `unsupported discord token_source "legacy"`)
}
func TestWriteAndLoadRoundTrip(t *testing.T) {
@ -249,7 +246,7 @@ func TestAttachmentTextExplicitFalseSurvivesNormalize(t *testing.T) {
t.Parallel()
cfg := Default()
cfg.Sync.AttachmentText = boolPtr(false)
cfg.Sync.AttachmentText = new(false)
require.NoError(t, cfg.Normalize())
require.False(t, cfg.AttachmentTextEnabled())
}
@ -262,7 +259,7 @@ func TestExpandPath(t *testing.T) {
require.Contains(t, path, "discrawl-test")
}
func TestResolvePathAndLoadOpenClawFallbacks(t *testing.T) {
func TestResolvePath(t *testing.T) {
dir := t.TempDir()
envPath := filepath.Join(dir, "env.toml")
t.Setenv(DefaultConfigEnv, envPath)
@ -272,43 +269,6 @@ func TestResolvePathAndLoadOpenClawFallbacks(t *testing.T) {
require.Contains(t, ResolvePath(""), filepath.Join(".discrawl", "config.toml"))
_, err := ExpandPath("")
require.ErrorContains(t, err, "empty path")
openClawPath := filepath.Join(dir, "openclaw.json")
require.NoError(t, os.WriteFile(openClawPath, []byte(`{}`), 0o600))
require.NoError(t, os.WriteFile(openClawPath+".bak", []byte(`{
"channels": {
"discord": {
"accounts": {
"Work Account": {
"token": "backup-token",
"guilds": { "g9": {} }
}
}
}
}
}`), 0o600))
info, err := LoadOpenClawDiscord(openClawPath, "Work Account")
require.NoError(t, err)
require.Equal(t, "backup-token", info.Token)
require.Equal(t, []string{"g9"}, info.GuildIDs)
_, err = LoadOpenClawDiscord(filepath.Join(dir, "missing.json"), "default")
require.ErrorIs(t, err, os.ErrNotExist)
}
func TestOpenClawCandidatesIncludesBackups(t *testing.T) {
t.Parallel()
dir := t.TempDir()
base := filepath.Join(dir, "openclaw.json")
require.NoError(t, os.WriteFile(base, []byte(`{}`), 0o600))
require.NoError(t, os.WriteFile(base+".bak", []byte(`{}`), 0o600))
require.NoError(t, os.WriteFile(base+".bak.1", []byte(`{}`), 0o600))
paths, err := openClawCandidates(base)
require.NoError(t, err)
require.Len(t, paths, 3)
}
func TestEffectiveDefaultGuildAndDirs(t *testing.T) {
@ -316,12 +276,9 @@ func TestEffectiveDefaultGuildAndDirs(t *testing.T) {
require.Equal(t, "explicit", Config{DefaultGuildID: "explicit", GuildIDs: []string{"g1"}}.EffectiveDefaultGuildID())
require.Empty(t, Config{GuildIDs: []string{"g1", "g2"}}.EffectiveDefaultGuildID())
require.Equal(t, "default", normalizeAccount(""))
require.Equal(t, "work", normalizeAccount(" Work "))
require.Equal(t, []string{"a", "b"}, uniqueStrings([]string{" a ", "", "b", "a"}))
require.Equal(t, "token", NormalizeBotToken(" token "))
require.Nil(t, uniqueStrings(nil))
require.Equal(t, []string{"a", "b"}, mapKeys(map[string]int{"b": 2, "a": 1}))
cfg := Default()
cfg.GuildIDs = []string{"g1"}
@ -343,6 +300,9 @@ func TestResolvePathUsesEnv(t *testing.T) {
func TestConfigErrorsAndBackupFallback(t *testing.T) {
dir := t.TempDir()
t.Setenv(DefaultTokenEnv, "")
stubDiscordTokenKeyring(t, func(_, _ string) (string, error) {
return "", keyring.ErrNotFound
})
_, err := ExpandPath("")
require.Error(t, err)
@ -353,15 +313,15 @@ func TestConfigErrorsAndBackupFallback(t *testing.T) {
require.Error(t, err)
cfg := Default()
cfg.Discord.OpenClawConfig = filepath.Join(dir, "missing.json")
_, err = ResolveDiscordToken(cfg)
require.Error(t, err)
base := filepath.Join(dir, "openclaw.json")
backup := base + ".bak"
require.NoError(t, os.WriteFile(base, []byte(`{}`), 0o600))
require.NoError(t, os.WriteFile(backup, []byte(`{"channels":{"discord":{"token":"backup-token"}}}`), 0o600))
info, err := LoadOpenClawDiscord(base, "default")
require.NoError(t, err)
require.Equal(t, "backup-token", info.Token)
}
func stubDiscordTokenKeyring(t *testing.T, get func(service, account string) (string, error)) {
t.Helper()
old := discordTokenKeyringGet
discordTokenKeyringGet = get
t.Cleanup(func() {
discordTokenKeyringGet = old
})
}

View File

@ -0,0 +1,64 @@
package config
import (
"errors"
"fmt"
"os"
"strings"
"github.com/zalando/go-keyring"
)
var discordTokenKeyringGet = keyring.Get
func ResolveDiscordToken(cfg Config) (TokenResolution, error) {
if err := cfg.Normalize(); err != nil {
return TokenResolution{}, err
}
switch cfg.Discord.TokenSource {
case "none":
return TokenResolution{}, errors.New("discord token disabled by config")
case "env":
envToken := NormalizeBotToken(os.Getenv(cfg.Discord.TokenEnv))
if envToken != "" {
return TokenResolution{Token: envToken, Source: "env", Path: cfg.Discord.TokenEnv}, nil
}
token, err := resolveDiscordTokenFromKeyring(cfg)
if err == nil {
return token, nil
}
return TokenResolution{}, fmt.Errorf(
"discord token not found in environment variable %q or keyring item %q/%q: %w",
cfg.Discord.TokenEnv,
cfg.Discord.TokenKeyringService,
cfg.Discord.TokenKeyringAccount,
err,
)
case "keyring":
return resolveDiscordTokenFromKeyring(cfg)
default:
return TokenResolution{}, fmt.Errorf("unsupported discord token_source %q", cfg.Discord.TokenSource)
}
}
func resolveDiscordTokenFromKeyring(cfg Config) (TokenResolution, error) {
raw, err := discordTokenKeyringGet(cfg.Discord.TokenKeyringService, cfg.Discord.TokenKeyringAccount)
if err != nil {
return TokenResolution{}, err
}
token := NormalizeBotToken(raw)
if token == "" {
return TokenResolution{}, errors.New("keyring item is empty")
}
return TokenResolution{
Token: token,
Source: "keyring",
Path: cfg.Discord.TokenKeyringService + "/" + cfg.Discord.TokenKeyringAccount,
}, nil
}
func NormalizeBotToken(raw string) string {
raw = strings.TrimSpace(raw)
raw = strings.TrimPrefix(raw, "Bot ")
return strings.TrimSpace(raw)
}

View File

@ -2,6 +2,7 @@ package discord
import (
"context"
"errors"
"fmt"
"runtime"
"slices"
@ -179,7 +180,7 @@ func (c *Client) ChannelMessage(ctx context.Context, channelID, messageID string
func (c *Client) Tail(ctx context.Context, handler EventHandler) error {
if handler == nil {
return fmt.Errorf("missing event handler")
return errors.New("missing event handler")
}
tailCtx, cancel := context.WithCancel(ctx)
defer cancel()
@ -187,10 +188,8 @@ func (c *Client) Tail(ctx context.Context, handler EventHandler) error {
errCh := make(chan error, 1)
workCh := make(chan func(context.Context) error, c.tailQueueSize)
var wg sync.WaitGroup
for i := 0; i < c.tailWorkerCount; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for range c.tailWorkerCount {
wg.Go(func() {
for {
select {
case <-tailCtx.Done():
@ -207,7 +206,7 @@ func (c *Client) Tail(ctx context.Context, handler EventHandler) error {
}
}
}
}()
})
}
c.session.AddHandler(func(_ *discordgo.Session, evt *discordgo.MessageCreate) {
@ -299,7 +298,7 @@ func (c *Client) enqueueTailTask(
case workCh <- task:
default:
select {
case errCh <- fmt.Errorf("tail worker queue full"):
case errCh <- errors.New("tail worker queue full"):
default:
}
}

View File

@ -400,7 +400,7 @@ func TestTailFailsFastWhenWorkerQueueFills(t *testing.T) {
return
}
now := time.Now().UTC().Format(time.RFC3339)
for i := 0; i < 4; i++ {
for i := range 4 {
if err := conn.WriteJSON(map[string]any{
"op": 0,
"t": "MESSAGE_CREATE",

View File

@ -0,0 +1,198 @@
package discorddesktop
import (
"encoding/json"
"sort"
"strings"
)
type userLabel struct {
Name string
Priority int
}
func collectUserLabel(snap snapshot, raw map[string]any) {
id := stringField(raw, "id")
if !looksSnowflake(id) || !looksUserObject(raw) {
return
}
name, priority := userObjectLabel(raw)
if name == "" {
return
}
if existing, ok := snap.userLabels[id]; !ok || priority > existing.Priority || existing.Name == "" {
snap.userLabels[id] = userLabel{Name: name, Priority: priority}
}
}
func looksUserObject(raw map[string]any) bool {
for _, key := range []string{"username", "global_name", "display_name", "discriminator", "avatar", "bot", "public_flags"} {
if _, ok := raw[key]; ok {
return true
}
}
return false
}
func userObjectLabel(raw map[string]any) (string, int) {
if name := stringField(raw, "global_name"); name != "" {
return name, 3
}
if name := stringField(raw, "display_name"); name != "" {
return name, 2
}
if name := stringField(raw, "username"); name != "" {
return name, 1
}
return "", 0
}
func inferDirectMessageNames(snap snapshot) {
authorChannels := map[string]map[string]struct{}{}
channelAuthors := map[string]map[string]int{}
for id, msg := range snap.messages {
if label, ok := snap.userLabels[msg.Record.AuthorID]; ok && shouldUseUserLabel(msg.Record.AuthorName, label) {
msg.Record.AuthorName = label.Name
msg.Record.RawJSON = withRawAuthorLabel(msg.Record.RawJSON, msg.Record.AuthorID, label)
msg.PayloadJSON = withRawAuthorLabel(msg.PayloadJSON, msg.Record.AuthorID, label)
snap.messages[id] = msg
}
if msg.Record.GuildID != DirectMessageGuildID || msg.Record.AuthorID == "" {
continue
}
if authorChannels[msg.Record.AuthorID] == nil {
authorChannels[msg.Record.AuthorID] = map[string]struct{}{}
}
authorChannels[msg.Record.AuthorID][msg.Record.ChannelID] = struct{}{}
if channelAuthors[msg.Record.ChannelID] == nil {
channelAuthors[msg.Record.ChannelID] = map[string]int{}
}
channelAuthors[msg.Record.ChannelID][msg.Record.AuthorID]++
}
selfID := mostRepeatedDirectMessageAuthor(authorChannels)
for id, channel := range snap.channels {
if channel.GuildID != DirectMessageGuildID || !isFallbackChannelName(channel.Name, id) {
continue
}
name := directMessageChannelName(channelAuthors[id], snap.userLabels, selfID)
if name == "" {
continue
}
channel.Name = name
channel.RawJSON = withRawChannelName(channel.RawJSON, id, channel.GuildID, name, channel.Kind)
snap.channels[id] = channel
}
}
func shouldUseUserLabel(current string, label userLabel) bool {
if label.Name == "" || current == label.Name {
return false
}
return current == "" || label.Priority >= 2
}
func mostRepeatedDirectMessageAuthor(authorChannels map[string]map[string]struct{}) string {
selfID := ""
selfChannels := 1
for authorID, channels := range authorChannels {
if len(channels) > selfChannels {
selfID = authorID
selfChannels = len(channels)
}
}
return selfID
}
func directMessageChannelName(authorCounts map[string]int, labels map[string]userLabel, selfID string) string {
candidates := []string{}
bestID := ""
bestCount := -1
for authorID, count := range authorCounts {
label, ok := labels[authorID]
if !ok || label.Name == "" {
continue
}
if authorID == selfID && len(authorCounts) > 1 {
continue
}
if len(authorCounts) > 2 {
candidates = append(candidates, label.Name)
continue
}
if count > bestCount || (count == bestCount && label.Priority > labels[bestID].Priority) {
bestID = authorID
bestCount = count
}
}
if len(candidates) > 0 {
sort.Strings(candidates)
return strings.Join(candidates, ", ")
}
if bestID == "" {
return ""
}
return labels[bestID].Name
}
func isFallbackChannelName(name, id string) bool {
name = strings.TrimSpace(name)
return name == "" || name == "channel-"+shortID(id) || name == "dm-"+shortID(id)
}
func withRawChannelName(rawJSON, id, guildID, name, kind string) string {
raw := map[string]any{}
if rawJSON != "" {
_ = json.Unmarshal([]byte(rawJSON), &raw)
}
raw["id"] = id
raw["guild_id"] = guildID
raw["name"] = name
raw["kind"] = kind
raw["source"] = "discord_desktop"
body, err := json.Marshal(raw)
if err != nil {
return rawJSON
}
return string(body)
}
func withRawAuthorLabel(rawJSON, authorID string, label userLabel) string {
if rawJSON == "" || authorID == "" || label.Name == "" {
return rawJSON
}
raw := map[string]any{}
if err := json.Unmarshal([]byte(rawJSON), &raw); err != nil {
return rawJSON
}
author, _ := raw["author"].(map[string]any)
if author == nil {
author = map[string]any{}
}
author["id"] = authorID
if label.Priority >= 2 {
author["global_name"] = label.Name
} else {
author["username"] = label.Name
}
raw["author"] = author
body, err := json.Marshal(raw)
if err != nil {
return rawJSON
}
return string(body)
}
func sanitizedRawAuthor(raw map[string]any, authorID string) map[string]any {
author, _ := raw["author"].(map[string]any)
out := map[string]any{}
if authorID != "" {
out["id"] = authorID
}
for _, key := range []string{"username", "global_name", "display_name"} {
if value := stringField(author, key); value != "" {
out[key] = value
}
}
return out
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,198 @@
package discorddesktop
import (
"context"
"os"
"path/filepath"
"runtime"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/openclaw/discrawl/internal/store"
)
func TestFileFingerprintStatusHelpers(t *testing.T) {
base := fileFingerprint{Size: 123, ModUnixNS: 456}
require.True(t, sameFileFingerprint(base, fileFingerprint{Size: 123, ModUnixNS: 456, Status: fileStatusSkipped}))
require.False(t, sameFileFingerprint(base, fileFingerprint{Size: 124, ModUnixNS: 456}))
require.False(t, sameFileFingerprint(base, fileFingerprint{Size: 123, ModUnixNS: 457}))
require.True(t, isImportedFingerprint(base))
require.True(t, isImportedFingerprint(importedFingerprint(base)))
require.False(t, isImportedFingerprint(skippedFingerprint(base)))
require.Equal(t, fileStatusImported, importedFingerprint(base).Status)
require.Equal(t, fileStatusSkipped, skippedFingerprint(base).Status)
require.Equal(t, wiretapFileIndexScope, fileIndexScope(Options{}))
require.Equal(t, wiretapFileIndexScope, fileIndexScope(Options{FullCache: true}))
}
func TestSnapshotCopyHelpers(t *testing.T) {
base := newSnapshot()
base.routes["111111111111111121"] = "999999999999999996"
base.userLabels["222222222222222232"] = userLabel{Name: "Alice"}
base.channels["111111111111111121"] = store.ChannelRecord{ID: "111111111111111121", GuildID: "999999999999999996", Name: "general"}
snap := newSnapshotWithContext(base)
require.Equal(t, base.routes, snap.routes)
require.Equal(t, base.userLabels, snap.userLabels)
require.Empty(t, snap.channels)
next := newSnapshot()
next.routes["111111111111111122"] = "999999999999999996"
next.userLabels["222222222222222233"] = userLabel{Name: "Bob"}
next.channels["111111111111111122"] = store.ChannelRecord{ID: "111111111111111122", GuildID: "999999999999999996", Name: "random"}
mergeSnapshotContext(base, next)
require.Equal(t, "999999999999999996", base.routes["111111111111111122"])
require.Equal(t, "Bob", base.userLabels["222222222222222233"].Name)
require.Equal(t, "random", base.channels["111111111111111122"].Name)
lookup := copyChannelLookup(base.channels)
lookup["111111111111111122"] = store.ChannelRecord{ID: "changed"}
require.Equal(t, "random", base.channels["111111111111111122"].Name)
}
func TestSnapshotWithoutMessageEvents(t *testing.T) {
snap := newSnapshot()
snap.messages["333333333333333346"] = store.MessageMutation{
Record: store.MessageRecord{ID: "333333333333333346"},
Options: store.WriteOptions{
AppendEvent: true,
EnqueueEmbedding: true,
},
}
stripped := snapshotWithoutMessageEvents(snap)
require.False(t, stripped.messages["333333333333333346"].Options.AppendEvent)
require.True(t, stripped.messages["333333333333333346"].Options.EnqueueEmbedding)
require.True(t, snap.messages["333333333333333346"].Options.AppendEvent)
}
func TestRouteFilteredCacheHelpers(t *testing.T) {
require.Equal(t, fileSourceCacheData, sourceForPath("/tmp/discord", "/tmp/discord/Cache/Cache_Data/entry", "Cache/Cache_Data/entry"))
require.Equal(t, fileSourceCacheData, sourceForPath("/tmp/discord", "/tmp/discord/Service Worker/CacheStorage/cache/entry", "Service Worker/CacheStorage/cache/entry"))
require.Equal(t, fileSourceContext, sourceForPath("/tmp/discord", "/tmp/discord/Local Storage/leveldb/000001.log", "Local Storage/leveldb/000001.log"))
}
func TestCacheFileHasRouteHint(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.WriteFile(filepath.Join(dir, "route"), []byte("https://discord.com/api/v9/channels/111111111111111121/messages?limit=50"), 0o600))
require.NoError(t, os.WriteFile(filepath.Join(dir, "plain"), []byte("no discord route here"), 0o600))
root, err := os.OpenRoot(dir)
require.NoError(t, err)
defer func() { _ = root.Close() }()
ok, err := cacheFileHasRouteHint(root, "route")
require.NoError(t, err)
require.True(t, ok)
ok, err = cacheFileHasRouteHint(root, "plain")
require.NoError(t, err)
require.False(t, ok)
_, err = cacheFileHasRouteHint(root, "missing")
require.Error(t, err)
}
func TestImportAndStateEdgeBranches(t *testing.T) {
ctx := context.Background()
_, err := Import(ctx, nil, Options{})
require.ErrorContains(t, err, "store is required")
configHome := t.TempDir()
t.Setenv("XDG_CONFIG_HOME", configHome)
if runtime.GOOS == "linux" {
require.Equal(t, filepath.Join(configHome, "discord"), DefaultPath())
}
dir := t.TempDir()
s, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = s.Close() }()
stats, err := Import(ctx, s, Options{
Path: dir,
Now: func() time.Time { return time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC) },
})
require.NoError(t, err)
require.Equal(t, dir, stats.Path)
require.Equal(t, 1, stats.Checkpoints)
stats, err = Import(ctx, nil, Options{Path: filepath.Join(dir, "missing"), DryRun: true})
require.NoError(t, err)
require.True(t, stats.DryRun)
stats, err = Import(ctx, nil, Options{Path: dir, DryRun: true, FullCache: true})
require.NoError(t, err)
require.True(t, stats.FullCache)
require.NoError(t, s.SetSyncState(ctx, fileIndexScope(Options{}), "{not-json"))
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}))
state, err := loadScanState(ctx, s, Options{})
require.NoError(t, err)
require.Empty(t, state.previous)
require.Equal(t, "general", state.channels["c1"].Name)
}
func TestSnapshotFinalizeAndCommitBranches(t *testing.T) {
ctx := context.Background()
s, err := store.Open(ctx, filepath.Join(t.TempDir(), "discrawl.db"))
require.NoError(t, err)
defer func() { _ = s.Close() }()
snap := newSnapshot()
snap.messages["m-missing"] = store.MessageMutation{
Record: store.MessageRecord{ID: "m-missing", ChannelID: "c-missing", RawJSON: `{}`},
}
snap.messages["m-known"] = store.MessageMutation{
Record: store.MessageRecord{ID: "m-known", GuildID: "g1", ChannelID: "c1", ChannelName: "general", RawJSON: `{}`},
}
stats := &Stats{}
totals := newScanTotals()
unresolved := finalizeSnapshot(snap, map[string]store.ChannelRecord{
"c1": {ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`},
}, totals, stats, true)
require.Equal(t, unresolvedMessages{"m-missing": "c-missing"}, unresolved)
require.Equal(t, 1, stats.Messages)
require.Equal(t, 1, stats.SkippedMessages)
require.Equal(t, "general", snap.channels["c1"].Name)
require.Equal(t, "g1", snap.guilds["g1"].ID)
more := unresolvedMessages{"m2": "c2"}
mergeUnresolved(unresolved, more)
recordUnresolved(unresolved, totals, stats)
require.Equal(t, 2, stats.SkippedMessages)
state := scanState{current: map[string]fileFingerprint{}}
candidates := []fileCandidate{{relKey: "Cache_Data/entry", fingerprint: fileFingerprint{Size: 10, ModUnixNS: 20}}}
require.NoError(t, commitSnapshot(ctx, s, Options{DryRun: true}, state, candidates, newSnapshot(), true, stats))
require.NoError(t, commitSnapshot(ctx, s, Options{}, state, candidates, newSnapshot(), false, stats))
require.NoError(t, commitSnapshot(ctx, s, Options{}, state, candidates, newSnapshot(), true, stats))
require.True(t, isImportedFingerprint(state.current["Cache_Data/entry"]))
require.NoError(t, checkpointScannedCandidates(ctx, s, Options{DryRun: true}, state, candidates, stats))
require.NoError(t, checkpointScannedCandidates(ctx, s, Options{}, state, candidates, stats))
}
func TestRouteHintCollectionBranches(t *testing.T) {
dir := t.TempDir()
require.NoError(t, os.WriteFile(filepath.Join(dir, "route"), []byte("https://discord.com/channels/123456789012/111111111111111121"), 0o600))
require.NoError(t, os.WriteFile(filepath.Join(dir, "plain"), []byte("plain"), 0o600))
root, err := os.OpenRoot(dir)
require.NoError(t, err)
defer func() { _ = root.Close() }()
snap := newSnapshot()
err = collectCacheRouteHints(context.Background(), root, []fileCandidate{
{relPath: "missing"},
{relPath: "plain"},
{relPath: "route"},
}, snap)
require.NoError(t, err)
require.Equal(t, "123456789012", snap.routes["111111111111111121"])
canceled, cancel := context.WithCancel(context.Background())
cancel()
require.ErrorIs(t, collectCacheRouteHints(canceled, root, []fileCandidate{{relPath: "route"}}, newSnapshot()), context.Canceled)
}

View File

@ -0,0 +1,387 @@
package discorddesktop
import (
"context"
"fmt"
"os"
"path/filepath"
"strconv"
"testing"
"github.com/stretchr/testify/require"
"github.com/openclaw/discrawl/internal/store"
)
func TestImportFastCacheSkipsUnroutedCacheDataUnlessFullCache(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(`
{"id":"111111111111111121","guild_id":"999999999999999996","type":0,"name":"slow-cache"}
{"id":"333333333333333346","channel_id":"111111111111111121","content":"unrouted historical cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
`), 0o600))
fastStore, err := store.Open(ctx, filepath.Join(dir, "fast.db"))
require.NoError(t, err)
defer func() { _ = fastStore.Close() }()
stats, err := Import(ctx, fastStore, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, 1, stats.CacheFilesFastSkipped)
require.Equal(t, 0, stats.Messages)
results, err := fastStore.SearchMessages(ctx, store.SearchOptions{Query: "unrouted historical", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
stats, err = Import(ctx, fastStore, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, 0, stats.CacheFilesFastSkipped)
require.Equal(t, 1, stats.FilesUnchanged)
stats, err = Import(ctx, fastStore, Options{Path: dir, FullCache: true})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 1, stats.Messages)
fullStore, err := store.Open(ctx, filepath.Join(dir, "full.db"))
require.NoError(t, err)
defer func() { _ = fullStore.Close() }()
stats, err = Import(ctx, fullStore, Options{Path: dir, FullCache: true})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 0, stats.CacheFilesFastSkipped)
require.Equal(t, 1, stats.Messages)
results, err = fullStore.SearchMessages(ctx, store.SearchOptions{Query: "unrouted historical", Limit: 10})
require.NoError(t, err)
require.Len(t, results, 1)
require.Equal(t, "slow-cache", results[0].ChannelName)
}
func TestImportCheckpointsCacheBatches(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
for i := range checkpointEveryFiles + 1 {
channelID := "111111111111111121"
messageID := 333333333333333346 + i
body := bytesf(`https://discord.com/channels/999999999999999996/%s
{"id":"%d","channel_id":"%s","content":"checkpoint cache %d","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
`, channelID, messageID, channelID, i)
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), body, 0o600))
}
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, checkpointEveryFiles+1, stats.FilesScanned)
require.Equal(t, checkpointEveryFiles+1, stats.Messages)
require.GreaterOrEqual(t, stats.Checkpoints, 2)
stats, err = Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
}
func TestImportUsesLaterCacheMetadataBeforeCheckpointingEarlierBatch(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
channelID := "111111111111111121"
guildID := "999999999999999996"
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/api/v9/channels/%s/messages?limit=50
{"id":"333333333333333346","channel_id":"%s","content":"needs later channel metadata","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
`, channelID, channelID), 0o600))
for i := 1; i < checkpointEveryFiles; i++ {
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), bytesf(
"https://discord.com/api/v9/channels/%s/messages?limit=50\n",
channelID,
), 0o600))
}
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", checkpointEveryFiles)), bytesf(`https://discord.com/api/v9/channels/%s/messages?limit=50
{"id":"%s","guild_id":"%s","type":0,"name":"later-metadata"}
`, channelID, channelID, guildID), 0o600))
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, checkpointEveryFiles+1+checkpointEveryFiles, stats.FilesScanned)
require.Equal(t, 1, stats.Messages)
require.GreaterOrEqual(t, stats.Checkpoints, 2)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "needs later channel metadata", Limit: 10})
require.NoError(t, err)
require.Len(t, results, 1)
require.Equal(t, guildID, results[0].GuildID)
require.Equal(t, "later-metadata", results[0].ChannelName)
requireMessageCount(t, ctx, st, "message_events", 1)
stats, err = Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
requireMessageCount(t, ctx, st, "message_events", 1)
}
func TestImportCheckpointsPartiallyResolvedRetryBatch(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
resolvedChannelID := "111111111111111121"
unresolvedChannelID := "111111111111111122"
guildID := "999999999999999996"
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/api/v10/channels/%s/messages?limit=50
https://discord.com/api/v9/channels/%s/messages?limit=50
{"id":"333333333333333346","channel_id":"%s","content":"partially resolved retry message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
{"id":"333333333333333347","channel_id":"%s","content":"still unresolved retry message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222232","username":"alice"}}
`, resolvedChannelID, unresolvedChannelID, resolvedChannelID, unresolvedChannelID), 0o600))
for i := 1; i < checkpointEveryFiles; i++ {
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), bytesf(
"https://discord.com/api/v9/channels/%s/messages?limit=50\n",
resolvedChannelID,
), 0o600))
}
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", checkpointEveryFiles)), bytesf(`https://discord.com/api/v9/channels/%s/messages?limit=50
{"id":"%s","guild_id":"%s","type":0,"name":"partially-resolved"}
`, resolvedChannelID, resolvedChannelID, guildID), 0o600))
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, checkpointEveryFiles+1+checkpointEveryFiles, stats.FilesScanned)
require.Equal(t, 1, stats.Messages)
require.Equal(t, 1, stats.SkippedMessages)
require.GreaterOrEqual(t, stats.Checkpoints, 2)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "partially resolved retry", Limit: 10})
require.NoError(t, err)
require.Len(t, results, 1)
require.Equal(t, "partially-resolved", results[0].ChannelName)
results, err = st.SearchMessages(ctx, store.SearchOptions{Query: "still unresolved retry", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
requireMessageCount(t, ctx, st, "message_events", 1)
stats, err = Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
requireMessageCount(t, ctx, st, "message_events", 1)
}
func TestImportCheckpointsUnresolvableRouteBearingCacheMisses(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
channelID := "111111111111111121"
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/api/v9/channels/%s/messages?limit=50
{"id":"333333333333333346","channel_id":"%s","content":"permanent unresolved cache miss","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
`, channelID, channelID), 0o600))
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 1, stats.SkippedMessages)
require.Equal(t, 1, stats.Checkpoints)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "permanent unresolved", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
stats, err = Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, 1, stats.FilesUnchanged)
}
func TestImportDoesNotAppendEventsForSkippedMixedBatch(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
guildID := "999999999999999996"
resolvedChannelID := "111111111111111121"
unresolvedChannelID := "111111111111111122"
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/channels/%s/%s
https://discord.com/api/v9/channels/%s/messages?limit=50
{"id":"333333333333333346","channel_id":"%s","content":"mixed resolved message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
{"id":"333333333333333347","channel_id":"%s","content":"mixed unresolved message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222232","username":"alice"}}
`, guildID, resolvedChannelID, unresolvedChannelID, resolvedChannelID, unresolvedChannelID), 0o600))
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 1, stats.Checkpoints)
requireMessageCount(t, ctx, st, "message_events", 0)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "mixed resolved", Limit: 10})
require.NoError(t, err)
require.Len(t, results, 1)
results, err = st.SearchMessages(ctx, store.SearchOptions{Query: "mixed unresolved", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
stats, err = Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, 1, stats.FilesUnchanged)
requireMessageCount(t, ctx, st, "message_events", 0)
}
func TestImportDoesNotDuplicateEventsWhenSwitchingFullCacheModes(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
channelID := "111111111111111121"
guildID := "999999999999999996"
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/channels/%s/%s
{"id":"%s","guild_id":"%s","type":0,"name":"mode-switch"}
{"id":"333333333333333346","channel_id":"%s","content":"mode switch event once","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
`, guildID, channelID, channelID, guildID, channelID), 0o600))
t.Run("full then default", func(t *testing.T) {
st, err := store.Open(ctx, filepath.Join(dir, "full-first.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir, FullCache: true})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 1, stats.Messages)
requireMessageCount(t, ctx, st, "message_events", 1)
stats, err = Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, 1, stats.FilesUnchanged)
requireMessageCount(t, ctx, st, "message_events", 1)
})
t.Run("default then full", func(t *testing.T) {
st, err := store.Open(ctx, filepath.Join(dir, "default-first.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 1, stats.Messages)
requireMessageCount(t, ctx, st, "message_events", 1)
stats, err = Import(ctx, st, Options{Path: dir, FullCache: true})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, 1, stats.FilesUnchanged)
requireMessageCount(t, ctx, st, "message_events", 1)
})
}
func TestImportFastCachePreservesKnownChannelMetadataAcrossBatches(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
leveldbPath := filepath.Join(dir, "Local Storage", "leveldb")
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(leveldbPath, 0o755))
require.NoError(t, os.MkdirAll(cachePath, 0o755))
channelID := "111111111111111121"
guildID := "999999999999999996"
require.NoError(t, os.WriteFile(filepath.Join(leveldbPath, "000001.log"), bytesf(
`{"id":"%s","guild_id":"%s","type":11,"name":"known-thread","thread_metadata":{"archived":false}}`,
channelID,
guildID,
), 0o600))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), bytesf(`https://discord.com/channels/%s/%s
{"id":"333333333333333346","channel_id":"%s","content":"thread metadata cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
`, guildID, channelID, channelID), 0o600))
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 1, stats.Messages)
channels, err := st.Channels(ctx, guildID)
require.NoError(t, err)
require.Len(t, channels, 1)
require.Equal(t, "known-thread", channels[0].Name)
require.Equal(t, "thread_public", channels[0].Kind)
_, rows, err := st.ReadOnlyQuery(ctx, "select raw_json from channels where id = '111111111111111121'")
require.NoError(t, err)
require.Len(t, rows, 1)
require.Contains(t, rows[0][0], `"type":11`)
}
func TestImportFastCacheRouteFiltersServiceWorkerCacheStorage(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Service Worker", "CacheStorage", "cache-id")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "unrouted"), []byte(`
{"id":"111111111111111121","guild_id":"999999999999999996","type":0,"name":"service-worker-cache"}
{"id":"333333333333333346","channel_id":"111111111111111121","content":"service worker historical cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
`), 0o600))
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, 1, stats.CacheFilesFastSkipped)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "service worker historical", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
}
func requireMessageCount(t *testing.T, ctx context.Context, st *store.Store, table string, expected int) {
t.Helper()
_, rows, err := st.ReadOnlyQuery(ctx, "select count(*) from "+table)
require.NoError(t, err)
require.Len(t, rows, 1)
require.Len(t, rows[0], 1)
require.Equal(t, strconv.Itoa(expected), rows[0][0])
}
func bytesf(format string, args ...any) []byte {
return fmt.Appendf(nil, format, args...)
}

View File

@ -0,0 +1,110 @@
package discorddesktop
import (
"context"
"os"
"github.com/openclaw/discrawl/internal/store"
)
type importRun struct {
ctx context.Context
st *store.Store
opts Options
state scanState
rootFS *os.Root
channelLookup map[string]store.ChannelRecord
totals scanTotals
stats *Stats
base snapshot
pending []fileCandidate
pendingUnresolved unresolvedMessages
pendingLookupSize int
pendingRouteSize int
}
func newImportRun(ctx context.Context, st *store.Store, opts Options, state scanState, rootFS *os.Root, stats *Stats) *importRun {
return &importRun{
ctx: ctx,
st: st,
opts: opts,
state: state,
rootFS: rootFS,
channelLookup: copyChannelLookup(state.channels),
totals: newScanTotals(),
stats: stats,
base: newSnapshot(),
pendingUnresolved: unresolvedMessages{},
pendingLookupSize: -1,
pendingRouteSize: -1,
}
}
func (r *importRun) scanContext(candidates []fileCandidate) error {
if err := scanCandidates(r.ctx, r.rootFS, r.opts, candidates, r.base, r.channelLookup, r.stats); err != nil {
return err
}
return r.finalizeAndCommit(candidates, r.base, false)
}
func (r *importRun) scanCacheBatches(candidates []fileCandidate) error {
for start := 0; start < len(candidates); start += checkpointEveryFiles {
end := min(start+checkpointEveryFiles, len(candidates))
batchCandidates := candidates[start:end]
batch := newSnapshotWithContext(r.base)
if err := scanCandidates(r.ctx, r.rootFS, r.opts, batchCandidates, batch, r.channelLookup, r.stats); err != nil {
return err
}
if err := r.finalizeAndCommit(batchCandidates, batch, false); err != nil {
return err
}
mergeSnapshotContext(r.base, batch)
}
return nil
}
func (r *importRun) finalizeAndCommit(candidates []fileCandidate, snap snapshot, recordSkipped bool) error {
unresolved := finalizeSnapshot(snap, r.channelLookup, r.totals, r.stats, recordSkipped)
checkpoint := len(unresolved) == 0
if !checkpoint {
r.deferCandidates(candidates, unresolved)
}
if len(candidates) == 0 && !snapshotHasChanges(snap) {
return nil
}
return commitSnapshot(r.ctx, r.st, r.opts, r.state, candidates, snap, checkpoint, r.stats)
}
func (r *importRun) deferCandidates(candidates []fileCandidate, unresolved unresolvedMessages) {
r.pending = append(r.pending, candidates...)
mergeUnresolved(r.pendingUnresolved, unresolved)
if r.pendingLookupSize >= 0 {
return
}
r.pendingLookupSize = len(r.channelLookup)
r.pendingRouteSize = len(r.base.routes)
}
func (r *importRun) retryPending() error {
if len(r.pending) == 0 {
return nil
}
if !r.pendingCanResolve() {
recordUnresolved(r.pendingUnresolved, r.totals, r.stats)
return checkpointScannedCandidates(r.ctx, r.st, r.opts, r.state, r.pending, r.stats)
}
retry := newSnapshotWithContext(r.base)
if err := scanCandidates(r.ctx, r.rootFS, r.opts, r.pending, retry, r.channelLookup, r.stats); err != nil {
return err
}
finalizeSnapshot(retry, r.channelLookup, r.totals, r.stats, true)
if err := commitSnapshot(r.ctx, r.st, r.opts, r.state, r.pending, retry, true, r.stats); err != nil {
return err
}
mergeSnapshotContext(r.base, retry)
return nil
}
func (r *importRun) pendingCanResolve() bool {
return len(r.channelLookup) > r.pendingLookupSize || len(r.base.routes) > r.pendingRouteSize
}

View File

@ -0,0 +1,511 @@
package discorddesktop
import (
"bytes"
"compress/gzip"
"context"
"encoding/json"
"os"
"path/filepath"
"runtime"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/openclaw/discrawl/internal/store"
)
func TestDesktopPathAndImportHelpers(t *testing.T) {
home := t.TempDir()
switch runtime.GOOS {
case "windows":
t.Setenv("USERPROFILE", home)
require.Equal(t, filepath.Join(home, "AppData", "Roaming", "discord"), DefaultPath())
case "darwin":
t.Setenv("HOME", home)
require.Equal(t, filepath.Join(home, "Library", "Application Support", "discord"), DefaultPath())
default:
xdg := filepath.Join(home, "xdg")
t.Setenv("XDG_CONFIG_HOME", xdg)
require.Equal(t, filepath.Join(xdg, "discord"), DefaultPath())
}
require.Equal(t, "dm", kindForChannelType(1, true))
require.Equal(t, "group_dm", kindForChannelType(3, true))
require.Equal(t, "text", kindForChannelType(0, false))
require.Equal(t, "announcement", kindForChannelType(5, false))
require.Equal(t, "thread_announcement", kindForChannelType(10, false))
require.Equal(t, "thread_public", kindForChannelType(11, false))
require.Equal(t, "thread_private", kindForChannelType(12, false))
require.Equal(t, "forum", kindForChannelType(15, false))
require.Equal(t, "desktop", kindForChannelType(99, false))
embedParts := embedText(map[string]any{"embeds": []any{
map[string]any{"title": " title ", "description": "body"},
}})
require.Equal(t, []string{"title", "body"}, embedParts)
require.Equal(t, time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC), snowflakeTime("0"))
require.True(t, snowflakeTime("not-a-snowflake").IsZero())
require.Equal(t, time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC), parseDiscordTime("2026-04-24T12:00:00Z"))
require.True(t, parseDiscordTime("bad").IsZero())
require.Empty(t, formatOptionalTime(time.Time{}))
require.NotEmpty(t, formatOptionalTime(time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)))
i, ok := intField(map[string]any{"value": float64(3)}, "value")
require.True(t, ok)
require.Equal(t, 3, i)
i, ok = intField(map[string]any{"value": json.Number("4")}, "value")
require.True(t, ok)
require.Equal(t, 4, i)
_, ok = intField(map[string]any{"value": json.Number("nope")}, "value")
require.False(t, ok)
_, ok = intField(map[string]any{}, "value")
require.False(t, ok)
require.Equal(t, int64(3), int64Field(map[string]any{"value": float64(3)}, "value"))
require.Equal(t, int64(4), int64Field(map[string]any{"value": int64(4)}, "value"))
require.Equal(t, int64(5), int64Field(map[string]any{"value": 5}, "value"))
require.Equal(t, int64(6), int64Field(map[string]any{"value": json.Number("6")}, "value"))
require.Zero(t, int64Field(map[string]any{"value": "6"}, "value"))
}
func TestImportExtractsDirectMessageFromDesktopCache(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Local Storage", "leveldb")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "000001.log"), []byte(`noise
{"id":"111111111111111111","type":1,"recipients":[{"id":"222222222222222222","username":"alice","global_name":"Alice"}]}
binary-ish {"t":"MESSAGE_CREATE","token":"do-not-store","d":{"id":"333333333333333333","channel_id":"111111111111111111","content":"launch checklist in a DM","timestamp":"2026-04-23T18:20:43.123Z","author":{"id":"222222222222222222","username":"alice","global_name":"Alice"},"attachments":[{"id":"444444444444444444","filename":"plan.txt","size":10}],"mentions":[{"id":"555555555555555555","username":"bob"}]}} tail
`), 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{
Path: dir,
Now: func() time.Time { return time.Date(2026, 4, 23, 18, 30, 0, 0, time.UTC) },
})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 1, stats.Messages)
require.Equal(t, 1, stats.Channels)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "launch", Limit: 10})
require.NoError(t, err)
require.Len(t, results, 1)
require.Equal(t, DirectMessageGuildID, results[0].GuildID)
require.Equal(t, "Alice", results[0].ChannelName)
require.Equal(t, "Alice", results[0].AuthorName)
mentions, err := st.ListMentions(ctx, store.MentionListOptions{
GuildIDs: []string{DirectMessageGuildID},
Target: "bob",
Limit: 10,
})
require.NoError(t, err)
require.Len(t, mentions, 1)
require.Equal(t, "555555555555555555", mentions[0].TargetID)
_, rows, err := st.ReadOnlyQuery(ctx, "select raw_json from messages where id = '333333333333333333'")
require.NoError(t, err)
require.Len(t, rows, 1)
require.NotContains(t, rows[0][0], "do-not-store")
}
func TestImportSkipsUnchangedDesktopCacheFiles(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Local Storage", "leveldb")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
channelPath := filepath.Join(cachePath, "000001.log")
messagePath := filepath.Join(cachePath, "000002.log")
require.NoError(t, os.WriteFile(channelPath, []byte(`{"id":"111111111111111121","guild_id":"999999999999999996","type":0,"name":"wiretap-fast"}`), 0o600))
require.NoError(t, os.WriteFile(messagePath, []byte(`{"id":"333333333333333346","channel_id":"111111111111111121","content":"first incremental message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}`), 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 2, stats.FilesScanned)
require.Equal(t, 1, stats.Messages)
stats, err = Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.FilesScanned)
require.Equal(t, 2, stats.FilesUnchanged)
require.Equal(t, 0, stats.Messages)
require.NoError(t, os.WriteFile(messagePath, []byte(`{"id":"333333333333333347","channel_id":"111111111111111121","content":"second incremental message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222233","username":"bob"}}`), 0o600))
require.NoError(t, os.Chtimes(messagePath, time.Date(2026, 4, 23, 18, 21, 0, 0, time.UTC), time.Date(2026, 4, 23, 18, 21, 0, 0, time.UTC)))
stats, err = Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 1, stats.FilesUnchanged)
require.Equal(t, 1, stats.Messages)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "second incremental", Limit: 10})
require.NoError(t, err)
require.Len(t, results, 1)
require.Equal(t, "999999999999999996", results[0].GuildID)
require.Equal(t, "wiretap-fast", results[0].ChannelName)
}
func TestImportDryRunDoesNotWrite(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
require.NoError(t, os.WriteFile(filepath.Join(dir, "000001.log"), []byte(`{"id":"333333333333333333","channel_id":"111111111111111111","content":"dry run only","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222222","username":"alice"}}`), 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir, DryRun: true})
require.NoError(t, err)
require.True(t, stats.DryRun)
require.Equal(t, 0, stats.Messages)
require.Equal(t, 1, stats.SkippedMessages)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "dry", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
}
func TestImportMissingDesktopPathIsEmpty(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
path := filepath.Join(dir, "missing")
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: path})
require.NoError(t, err)
require.Equal(t, path, stats.Path)
require.Zero(t, stats.FilesScanned)
require.Zero(t, stats.Messages)
require.False(t, stats.FinishedAt.IsZero())
}
func TestImportExtractsCompressedUnknownMessageArrayFromChromiumCache(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
var compressed bytes.Buffer
zw := gzip.NewWriter(&compressed)
_, err := zw.Write([]byte(`[{"id":"333333333333333334","channel_id":"111111111111111112","content":"compressed cache history","timestamp":"2026-04-23T18:20:43.123Z","author":{"id":"222222222222222223","username":"alice"}}]`))
require.NoError(t, err)
require.NoError(t, zw.Close())
cacheBlob := append([]byte("https://discord.com/api/v9/channels/111111111111111112/messages?limit=50\x00"), compressed.Bytes()...)
cacheBlob = append(cacheBlob, []byte("chromium trailing metadata")...)
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), cacheBlob, 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 1, stats.FilesScanned)
require.Equal(t, 0, stats.Messages)
require.Equal(t, 0, stats.DMMessages)
require.Equal(t, 1, stats.SkippedMessages)
require.Equal(t, 1, stats.SkippedChannels)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "compressed", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
}
func TestImportClassifiesCachedAPIMessageArrayFromSelectedDMRoute(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
storagePath := filepath.Join(dir, "Local Storage", "leveldb")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.MkdirAll(storagePath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(storagePath, "000001.log"), []byte(`noise
{"_state":{"selectedGuildId":null,"selectedChannelId":"1459084628458471569","selectedChannelIds":{"null":"1459084628458471569"}}}
`), 0o600))
messages := `[
{"id":"1499513741308461240","channel_id":"1459084628458471569","content":"changed your mind later","timestamp":"2026-04-30T20:52:15.546Z","author":{"id":"1395396685148061737","username":"onur_tc","global_name":"onur"}},
{"id":"1499513741308461241","channel_id":"1459084628458471569","content":"please correct me","timestamp":"2026-04-30T20:52:16.546Z","author":{"id":"1395396685148061737","username":"onur_tc","global_name":"onur"}},
{"id":"1499562787343278080","channel_id":"1459084628458471569","content":"I know you are going through a rough time","timestamp":"2026-05-01T00:08:34.929Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}}
]`
var compressed bytes.Buffer
zw := gzip.NewWriter(&compressed)
_, err := zw.Write([]byte(messages))
require.NoError(t, err)
require.NoError(t, zw.Close())
cacheBlob := append([]byte("https://discord.com/api/v9/channels/1459084628458471569/messages?limit=14\x00"), compressed.Bytes()...)
cacheBlob = append(cacheBlob, []byte("chromium trailing metadata")...)
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), cacheBlob, 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 2, stats.FilesScanned)
require.Equal(t, 3, stats.Messages)
require.Equal(t, 3, stats.DMMessages)
require.Equal(t, 1, stats.DMChannels)
require.Equal(t, 0, stats.SkippedMessages)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "changed your mind", Limit: 10})
require.NoError(t, err)
require.Len(t, results, 1)
require.Equal(t, DirectMessageGuildID, results[0].GuildID)
require.Equal(t, "onur", results[0].ChannelName)
require.Equal(t, "onur", results[0].AuthorName)
}
func TestImportReconcilesMessagesWithLaterGuildChannelMetadata(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Local Storage", "leveldb")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "000001.log"), []byte(`{"id":"333333333333333335","channel_id":"111111111111111113","content":"guild cache message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222224","username":"alice"},"mentions":[{"id":"555555555555555556","username":"bob"}],"attachments":[{"id":"444444444444444445","filename":"trace.txt"}]}`), 0o600))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "000002.log"), []byte(`{"id":"111111111111111113","guild_id":"999999999999999999","type":0,"name":"backend"}`), 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 1, stats.Messages)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "guild cache", Limit: 10})
require.NoError(t, err)
require.Len(t, results, 1)
require.Equal(t, "999999999999999999", results[0].GuildID)
require.Equal(t, "backend", results[0].ChannelName)
mentions, err := st.ListMentions(ctx, store.MentionListOptions{
GuildIDs: []string{"999999999999999999"},
Target: "bob",
Limit: 10,
})
require.NoError(t, err)
require.Len(t, mentions, 1)
_, rows, err := st.ReadOnlyQuery(ctx, "select guild_id from message_attachments where message_id = '333333333333333335'")
require.NoError(t, err)
require.Len(t, rows, 1)
require.Equal(t, "999999999999999999", rows[0][0])
}
func TestImportClassifiesMessagesFromCachedChannelRoutes(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "dm_0"), []byte(`https://discord.com/channels/@me/111111111111111114
{"id":"333333333333333336","channel_id":"111111111111111114","content":"route dm message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222225","username":"alice"}}`), 0o600))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "guild_0"), []byte(`https://discord.com/channels/999999999999999998/111111111111111115
{"id":"333333333333333337","channel_id":"111111111111111115","content":"route guild message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222226","username":"bob"}}`), 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 2, stats.Messages)
require.Equal(t, 1, stats.DMMessages)
require.Equal(t, 1, stats.GuildMessages)
require.Equal(t, 0, stats.SkippedMessages)
dmResults, err := st.SearchMessages(ctx, store.SearchOptions{Query: "route dm", Limit: 10})
require.NoError(t, err)
require.Len(t, dmResults, 1)
require.Equal(t, DirectMessageGuildID, dmResults[0].GuildID)
guildResults, err := st.SearchMessages(ctx, store.SearchOptions{Query: "route guild", Limit: 10})
require.NoError(t, err)
require.Len(t, guildResults, 1)
require.Equal(t, "999999999999999998", guildResults[0].GuildID)
guildChannels, err := st.Channels(ctx, "999999999999999998")
require.NoError(t, err)
require.Len(t, guildChannels, 1)
require.Equal(t, "111111111111111115", guildChannels[0].ID)
require.Equal(t, "channel-111115", guildChannels[0].Name)
_, guildRows, err := st.ReadOnlyQuery(ctx, "select name from guilds where id = '999999999999999998'")
require.NoError(t, err)
require.Equal(t, [][]string{{"Discord Desktop Guild 999999999999999998"}}, guildRows)
}
func TestImportClassifiesGzipCacheMessagesFromRendererRoutes(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
logPath := filepath.Join(dir, "logs")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.MkdirAll(logPath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(logPath, "renderer_js.log"), []byte(`[Routing/Utils] transitionTo - Transitioning to /channels/@me/111111111111111122
[Routing/Utils] transitionTo - Transitioning to /channels/999999999999999995/111111111111111123
`), 0o600))
var compressed bytes.Buffer
zw := gzip.NewWriter(&compressed)
_, err := zw.Write([]byte(`[
{"id":"333333333333333348","channel_id":"111111111111111122","content":"current cache dm","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222234","username":"alice","global_name":"Alice"}},
{"id":"333333333333333349","channel_id":"111111111111111123","content":"current cache guild","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222235","username":"bob","global_name":"Bob"}}
]`))
require.NoError(t, err)
require.NoError(t, zw.Close())
cacheBlob := append([]byte("1/0/https://discord.com/api/v9/channels/111111111111111122/messages?limit=13\x00content-encoding\x00gzip\x00"), compressed.Bytes()...)
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), cacheBlob, 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 2, stats.FilesScanned)
require.Equal(t, 2, stats.Messages)
require.Equal(t, 1, stats.DMMessages)
require.Equal(t, 1, stats.GuildMessages)
require.Equal(t, 0, stats.SkippedMessages)
dmResults, err := st.SearchMessages(ctx, store.SearchOptions{Query: "current cache dm", Limit: 10})
require.NoError(t, err)
require.Len(t, dmResults, 1)
require.Equal(t, DirectMessageGuildID, dmResults[0].GuildID)
require.Equal(t, "Alice", dmResults[0].ChannelName)
guildResults, err := st.SearchMessages(ctx, store.SearchOptions{Query: "current cache guild", Limit: 10})
require.NoError(t, err)
require.Len(t, guildResults, 1)
require.Equal(t, "999999999999999995", guildResults[0].GuildID)
require.Equal(t, "channel-111123", guildResults[0].ChannelName)
}
func TestImportInfersDirectMessageNamesFromCachedUsers(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(`https://discord.com/channels/@me/111111111111111119
[
{"id":"333333333333333341","channel_id":"111111111111111119","content":"self first","timestamp":"2026-04-23T18:20:43Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}},
{"id":"333333333333333342","channel_id":"111111111111111119","content":"self second","timestamp":"2026-04-23T18:20:44Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}},
{"id":"333333333333333343","channel_id":"111111111111111119","content":"counterparty","timestamp":"2026-04-23T18:20:45Z","author":{"id":"222222222222222230","username":"vincentkoc"}}
]
{"user":{"id":"222222222222222230","username":"vincentkoc","global_name":"Vincent K"}}
https://discord.com/channels/@me/111111111111111120
{"id":"333333333333333344","channel_id":"111111111111111120","content":"another dm","timestamp":"2026-04-23T18:20:46Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}}
{"id":"333333333333333345","channel_id":"111111111111111120","content":"alice reply","timestamp":"2026-04-23T18:20:47Z","author":{"id":"222222222222222231","username":"alice","global_name":"Alice"}}
`), 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 5, stats.Messages)
require.Equal(t, 2, stats.DMChannels)
channels, err := st.Channels(ctx, DirectMessageGuildID)
require.NoError(t, err)
namesByID := map[string]string{}
for _, channel := range channels {
namesByID[channel.ID] = channel.Name
}
require.Equal(t, "Vincent K", namesByID["111111111111111119"])
require.Equal(t, "Alice", namesByID["111111111111111120"])
rows, err := st.ListMessages(ctx, store.MessageListOptions{
GuildIDs: []string{DirectMessageGuildID},
Channel: "Vincent",
Last: 1,
})
require.NoError(t, err)
require.Len(t, rows, 1)
require.Equal(t, "Vincent K", rows[0].ChannelName)
require.Equal(t, "Vincent K", rows[0].AuthorName)
}
func TestImportDropsPreviousUnknownWiretapRows(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
require.NoError(t, st.UpsertGuild(ctx, store.GuildRecord{ID: "@unknown", Name: "Unknown", RawJSON: `{}`}))
require.NoError(t, st.UpsertChannel(ctx, store.ChannelRecord{ID: "111111111111111116", GuildID: "@unknown", Kind: "unknown", Name: "unknown", RawJSON: `{}`}))
require.NoError(t, st.UpsertMessage(ctx, store.MessageRecord{
ID: "333333333333333338",
GuildID: "@unknown",
ChannelID: "111111111111111116",
AuthorID: "222222222222222227",
AuthorName: "alice",
MessageType: 0,
CreatedAt: "2026-04-23T18:20:43Z",
Content: "stale unknown message",
NormalizedContent: "stale unknown message",
RawJSON: `{}`,
}))
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.Messages)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "stale unknown", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
}
func TestImportSkipsAmbiguousCachedChannelRoutes(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
require.NoError(t, os.MkdirAll(cachePath, 0o755))
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(`https://discord.com/channels/999999999999999998/111111111111111118
https://discord.com/channels/999999999999999997/111111111111111118
{"id":"333333333333333340","channel_id":"111111111111111118","content":"ambiguous route message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222229","username":"alice"}}`), 0o600))
dbPath := filepath.Join(dir, "discrawl.db")
st, err := store.Open(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = st.Close() }()
stats, err := Import(ctx, st, Options{Path: dir})
require.NoError(t, err)
require.Equal(t, 0, stats.Messages)
require.Equal(t, 1, stats.SkippedMessages)
require.Equal(t, 1, stats.SkippedChannels)
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "ambiguous", Limit: 10})
require.NoError(t, err)
require.Empty(t, results)
}

View File

@ -0,0 +1,165 @@
package discorddesktop
import (
"encoding/json"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/openclaw/discrawl/internal/store"
)
func TestPrimitiveValueHelpers(t *testing.T) {
raw := map[string]any{
"string": "value",
"blank": " ",
"int": 3,
"int64": int64(4),
"float": float64(5),
"json_number": json.Number("6"),
"numeric": "7",
"bad_numeric": "nope",
"truthy": true,
"array": []any{"one", "two"},
}
require.Equal(t, "value", stringField(raw, "string"))
require.Empty(t, stringField(raw, "blank"))
require.Equal(t, "6", stringField(raw, "json_number"))
require.Empty(t, stringField(raw, "int"))
require.Empty(t, stringField(raw, "missing"))
for key, want := range map[string]int{
"int": 3,
"float": 5,
"json_number": 6,
} {
got, ok := intField(raw, key)
require.True(t, ok, key)
require.Equal(t, want, got, key)
}
_, ok := intField(raw, "bad_numeric")
require.False(t, ok)
_, ok = intField(raw, "int64")
require.False(t, ok)
_, ok = intField(raw, "numeric")
require.False(t, ok)
_, ok = intField(raw, "missing")
require.False(t, ok)
require.Equal(t, int64(3), int64Field(raw, "int"))
require.Equal(t, int64(4), int64Field(raw, "int64"))
require.Equal(t, int64(5), int64Field(raw, "float"))
require.Equal(t, int64(6), int64Field(raw, "json_number"))
require.Zero(t, int64Field(raw, "numeric"))
require.Zero(t, int64Field(raw, "bad_numeric"))
require.True(t, boolField(raw, "truthy"))
require.False(t, boolField(raw, "missing"))
require.Equal(t, 2, lenArray(raw["array"]))
require.Zero(t, lenArray(raw["string"]))
require.Equal(t, "fallback", firstNonEmpty("", " ", "fallback", "later"))
require.Empty(t, firstNonEmpty("", " "))
}
func TestDiscordValueFormatHelpers(t *testing.T) {
require.Equal(t, "456789", shortID("123456789"))
require.Equal(t, "short", shortID("short"))
require.Equal(t, "Discord Direct Messages", guildName(DirectMessageGuildID))
require.Equal(t, "Discord Desktop Guild 123456", guildName("123456"))
require.Equal(t, "dm", kindForChannelType(1, true))
require.Equal(t, "group_dm", kindForChannelType(3, true))
require.Equal(t, "thread_public", kindForChannelType(11, false))
require.Equal(t, "thread_private", kindForChannelType(12, false))
require.Equal(t, "thread_announcement", kindForChannelType(10, false))
require.Equal(t, "desktop", kindForChannelType(2, false))
require.Equal(t, "desktop", kindForChannelType(4, false))
require.Equal(t, "announcement", kindForChannelType(5, false))
require.Equal(t, "forum", kindForChannelType(15, false))
require.Equal(t, "desktop", kindForChannelType(16, false))
require.Equal(t, "text", kindForChannelType(0, false))
}
func TestDiscordMessagePayloadHelpers(t *testing.T) {
raw := map[string]any{
"id": "333333333333333333",
"channel_id": "111111111111111111",
"guild_id": "999999999999999999",
"type": float64(0),
"timestamp": "2026-05-08T12:00:00Z",
"edited_timestamp": "2026-05-08T12:05:00Z",
"content": "hello\u200b\nworld",
"message_reference": map[string]any{"message_id": "222222222222222222"},
"author": map[string]any{
"id": "444444444444444444",
"username": "peter",
"global_name": "Peter",
"display_name": "Peter S",
"discriminator": "0",
"bot": true,
},
"attachments": []any{
map[string]any{"filename": "trace.txt", "content_type": "text/plain", "size": float64(12), "url": "https://cdn.example/trace.txt"},
map[string]any{"id": "att2"},
"ignored",
},
"mentions": []any{
map[string]any{"id": "555555555555555555", "username": "alice", "global_name": "Alice"},
map[string]any{"username": "missing"},
},
"embeds": []any{
map[string]any{"title": "Deploy", "description": "Ready"},
map[string]any{"title": " "},
},
}
at := parseDiscordTime("2026-05-08T12:00:00Z")
attachments := parseAttachments(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444")
require.Len(t, attachments, 2)
require.Equal(t, "333333333333333333:0", attachments[0].AttachmentID)
require.Equal(t, "trace.txt", attachments[0].Filename)
require.Equal(t, "att2", attachments[1].Filename)
require.Equal(t, []string{"trace.txt", "att2"}, attachmentText(attachments))
mentions := parseMentions(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444", at)
require.Equal(t, []store.MentionEventRecord{{
MessageID: "333333333333333333",
GuildID: "999999999999999999",
ChannelID: "111111111111111111",
AuthorID: "444444444444444444",
TargetType: "user",
TargetID: "555555555555555555",
TargetName: "Alice",
EventAt: at.Format(time.RFC3339Nano),
}}, mentions)
require.Equal(t, []string{"Deploy", "Ready"}, embedText(raw))
require.Equal(t, "helloworld\ntrace.txt\natt2\nDeploy\nReady", normalizeText(raw["content"], attachmentText(attachments), embedText(raw)))
require.Equal(t, "hidden text", cleanText("\u200bhidden\x00 text\n"))
require.Equal(t, "222222222222222222", messageReferenceID(raw))
require.Empty(t, messageReferenceID(map[string]any{}))
require.Contains(t, syntheticGuild("g1", "Guild").RawJSON, "discord_desktop")
require.Equal(t, "dm", syntheticChannel("c1", DirectMessageGuildID, "Alice").Kind)
require.Equal(t, "group_dm", syntheticChannel("c2", DirectMessageGuildID, "Alice, Bob").Kind)
require.Equal(t, "channel-123456", syntheticChannel("123456123456", "g1", "").Name)
require.Contains(t, channelRawJSON(raw, "c1", "g1", "general", "text"), `"kind":"text"`)
require.Contains(t, messageRawJSON(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444"), "desktop_cache_note")
require.Equal(t, "Alice, Bob", recipientLabel([]any{
map[string]any{"username": "Bob"},
map[string]any{"global_name": "Alice"},
map[string]any{},
}))
require.True(t, parseDiscordTime("2026-05-08T12:00:00.123Z").Equal(time.Date(2026, 5, 8, 12, 0, 0, 123000000, time.UTC)))
require.True(t, parseDiscordTime("bad").IsZero())
require.True(t, parseDiscordTime("").IsZero())
require.False(t, snowflakeTime("175928847299117063").IsZero())
require.True(t, snowflakeTime("bad").IsZero())
require.Empty(t, formatOptionalTime(time.Time{}))
require.Equal(t, "2026-05-08T12:00:00Z", formatOptionalTime(at))
require.True(t, looksSnowflake("123456789012"))
require.False(t, looksSnowflake("123"))
require.False(t, looksSnowflake("12345678901x"))
}

View File

@ -1,91 +0,0 @@
package embed
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
)
type ollamaProvider struct {
client *http.Client
baseURL string
model string
maxInputChars int
}
type ollamaEmbedRequest struct {
Model string `json:"model"`
Input []string `json:"input"`
}
type ollamaEmbedResponse struct {
Model string `json:"model"`
Embeddings [][]float32 `json:"embeddings"`
}
func newOllamaProvider(settings providerSettings) Provider {
return &ollamaProvider{
client: settings.HTTPClient,
baseURL: settings.BaseURL,
model: settings.Model,
maxInputChars: settings.MaxInputChars,
}
}
func (p *ollamaProvider) Embed(ctx context.Context, inputs []string) (EmbeddingBatch, error) {
if len(inputs) == 0 {
return EmbeddingBatch{Model: p.model}, nil
}
payload := ollamaEmbedRequest{
Model: p.model,
Input: trimInputs(inputs, p.maxInputChars),
}
var response ollamaEmbedResponse
if err := postJSON(ctx, p.client, p.baseURL+"/api/embed", "", payload, &response); err != nil {
return EmbeddingBatch{}, err
}
if len(response.Embeddings) != len(inputs) {
return EmbeddingBatch{}, fmt.Errorf("ollama embedding response returned %d vectors for %d inputs", len(response.Embeddings), len(inputs))
}
dimensions, err := inferDimensions(response.Embeddings)
if err != nil {
return EmbeddingBatch{}, err
}
model := response.Model
if model == "" {
model = p.model
}
return EmbeddingBatch{Model: model, Dimensions: dimensions, Vectors: response.Embeddings}, nil
}
func postJSON(ctx context.Context, client *http.Client, endpoint, apiKey string, payload any, target any) error {
body, err := json.Marshal(payload)
if err != nil {
return fmt.Errorf("marshal embedding request: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(body))
if err != nil {
return fmt.Errorf("build embedding request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
if apiKey != "" {
req.Header.Set("Authorization", "Bearer "+apiKey)
}
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("embedding request failed: %w", err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
msg, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return &HTTPError{StatusCode: resp.StatusCode, Body: string(msg)}
}
if err := json.NewDecoder(resp.Body).Decode(target); err != nil {
return fmt.Errorf("decode embedding response: %w", err)
}
return nil
}

View File

@ -1,82 +0,0 @@
package embed
import (
"context"
"fmt"
"net/http"
)
type openAICompatibleProvider struct {
client *http.Client
baseURL string
apiKey string
model string
maxInputChars int
}
type openAIEmbeddingRequest struct {
Model string `json:"model"`
Input []string `json:"input"`
}
type openAIEmbeddingResponse struct {
Model string `json:"model"`
Data []openAIEmbeddingItem `json:"data"`
}
type openAIEmbeddingItem struct {
Index *int `json:"index"`
Embedding []float32 `json:"embedding"`
}
func newOpenAICompatibleProvider(settings providerSettings) Provider {
return &openAICompatibleProvider{
client: settings.HTTPClient,
baseURL: settings.BaseURL,
apiKey: settings.APIKey,
model: settings.Model,
maxInputChars: settings.MaxInputChars,
}
}
func (p *openAICompatibleProvider) Embed(ctx context.Context, inputs []string) (EmbeddingBatch, error) {
if len(inputs) == 0 {
return EmbeddingBatch{Model: p.model}, nil
}
payload := openAIEmbeddingRequest{
Model: p.model,
Input: trimInputs(inputs, p.maxInputChars),
}
var response openAIEmbeddingResponse
if err := postJSON(ctx, p.client, p.baseURL+"/embeddings", p.apiKey, payload, &response); err != nil {
return EmbeddingBatch{}, err
}
if len(response.Data) != len(inputs) {
return EmbeddingBatch{}, fmt.Errorf("openai-compatible embedding response returned %d vectors for %d inputs", len(response.Data), len(inputs))
}
vectors := make([][]float32, len(inputs))
seen := make([]bool, len(inputs))
for position, item := range response.Data {
index := position
if item.Index != nil {
index = *item.Index
}
if index < 0 || index >= len(inputs) {
return EmbeddingBatch{}, fmt.Errorf("openai-compatible embedding response index %d out of range", index)
}
if seen[index] {
return EmbeddingBatch{}, fmt.Errorf("openai-compatible embedding response duplicated index %d", index)
}
seen[index] = true
vectors[index] = item.Embedding
}
dimensions, err := inferDimensions(vectors)
if err != nil {
return EmbeddingBatch{}, err
}
model := response.Model
if model == "" {
model = p.model
}
return EmbeddingBatch{Model: model, Dimensions: dimensions, Vectors: vectors}, nil
}

Some files were not shown because too many files have changed in this diff Show More