Compare commits
2 Commits
main
...
feat/discr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a473225867 | ||
|
|
afd9b5b33e |
@ -1,124 +0,0 @@
|
||||
---
|
||||
name: discrawl
|
||||
description: Use for local Discord archive search, sync freshness, DMs, channel summaries, desktop/API/git-share sources, TUI browsing, and Discrawl repo/release work.
|
||||
---
|
||||
|
||||
# Discrawl
|
||||
|
||||
Use local Discord archive data first for Discord questions. Hit Discord APIs
|
||||
only when the archive is stale, missing the requested scope, or the user asks
|
||||
for current external context.
|
||||
|
||||
## Sources
|
||||
|
||||
- DB: `~/.discrawl/discrawl.db`
|
||||
- Config: `~/.discrawl/config.toml`
|
||||
- Cache: `~/.discrawl/cache`
|
||||
- Logs: `~/.discrawl/logs`
|
||||
- Git share repo: `~/.discrawl/share`
|
||||
- Repo: `openclaw/discrawl`; use `~/GIT/_Perso/discrawl` only after verifying
|
||||
its remote targets `openclaw/discrawl`, otherwise use a fresh checkout
|
||||
- Preferred CLI: `discrawl`; fallback to `go run ./cmd/discrawl` from the repo if the installed binary is stale
|
||||
|
||||
## Freshness
|
||||
|
||||
For recent/current questions, check freshness before analysis:
|
||||
|
||||
```bash
|
||||
discrawl status --json
|
||||
```
|
||||
|
||||
For precise freshness from the default database:
|
||||
|
||||
```bash
|
||||
sqlite3 ~/.discrawl/discrawl.db \
|
||||
"select coalesce(max(updated_at),'') from sync_state where scope like 'channel:%';"
|
||||
```
|
||||
|
||||
Routine diagnostics:
|
||||
|
||||
```bash
|
||||
discrawl doctor
|
||||
```
|
||||
|
||||
Desktop-local refresh:
|
||||
|
||||
```bash
|
||||
discrawl sync --source wiretap
|
||||
```
|
||||
|
||||
Bot API latest refresh, when credentials are available:
|
||||
|
||||
```bash
|
||||
discrawl sync
|
||||
```
|
||||
|
||||
Use `--full` only for deliberate historical backfills:
|
||||
|
||||
```bash
|
||||
discrawl sync --full
|
||||
```
|
||||
|
||||
If SQLite reports busy/locked, check for stray `discrawl` processes before retrying.
|
||||
|
||||
## Query Workflow
|
||||
|
||||
1. Resolve scope: guild, channel, DM, author, keyword, date range.
|
||||
2. Check freshness for recent/current requests.
|
||||
3. Prefer CLI search/messages for slices; use read-only SQL for exact counts.
|
||||
4. Report absolute date spans, counts, channel/DM names, and known gaps.
|
||||
|
||||
Common commands:
|
||||
|
||||
```bash
|
||||
discrawl search "query"
|
||||
discrawl messages --channel '#maintainers' --days 7 --all
|
||||
discrawl dms --last 20
|
||||
discrawl tui --dm
|
||||
discrawl sql "select count(*) from messages;"
|
||||
```
|
||||
|
||||
## SQL
|
||||
|
||||
Use `discrawl sql` for exact counts, joins, and ranking queries when normal
|
||||
CLI reads are too coarse. The command is read-only by default, accepts SQL as
|
||||
args or stdin, and supports `--json` for agent parsing.
|
||||
|
||||
Useful examples:
|
||||
|
||||
```bash
|
||||
discrawl --json sql "select count(*) as messages from messages;"
|
||||
discrawl --json sql "select coalesce(nullif(c.name, ''), m.channel_id) as channel, count(*) as messages from messages m left join channels c on c.id = m.channel_id group by m.channel_id order by messages desc limit 20;"
|
||||
discrawl --json sql "select coalesce(nullif(mm.display_name, ''), nullif(mm.global_name, ''), nullif(mm.username, ''), m.author_id) as author, count(*) as messages from messages m left join members mm on mm.guild_id = m.guild_id and mm.user_id = m.author_id group by m.guild_id, m.author_id order by messages desc limit 20;"
|
||||
```
|
||||
|
||||
Never use `--unsafe --confirm` unless the user explicitly asks for a database
|
||||
mutation and the write has been reviewed.
|
||||
|
||||
When the installed CLI lacks a new feature, build or run from a verified
|
||||
`openclaw/discrawl` checkout before concluding the feature is missing.
|
||||
|
||||
## Discord Boundaries
|
||||
|
||||
Bot API sync requires configured Discord bot credentials; do not invent token
|
||||
availability. Desktop wiretap mode reads local Discord Desktop artifacts and
|
||||
must not extract credentials, use user tokens, call Discord as the user, or
|
||||
write to Discord application storage. Wiretap/Desktop cache DMs are local-only
|
||||
and must not be described as part of the published Git snapshot. Git-share
|
||||
snapshots must not include secrets or `@me` DM rows.
|
||||
|
||||
## Verification
|
||||
|
||||
For repo edits, prefer existing Go gates:
|
||||
|
||||
```bash
|
||||
GOWORK=off go test ./...
|
||||
```
|
||||
|
||||
Then run targeted CLI smoke for the touched surface, for example:
|
||||
|
||||
```bash
|
||||
discrawl doctor
|
||||
discrawl status --json
|
||||
discrawl search "test" --limit 5
|
||||
```
|
||||
@ -1,12 +0,0 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
|
||||
[*.{md,yml,yaml,json,toml}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
6
.gitattributes
vendored
6
.gitattributes
vendored
@ -1,6 +0,0 @@
|
||||
* text=auto
|
||||
*.go text eol=lf
|
||||
*.md text eol=lf
|
||||
*.toml text eol=lf
|
||||
*.yml text eol=lf
|
||||
*.yaml text eol=lf
|
||||
12
.github/CODEOWNERS
vendored
12
.github/CODEOWNERS
vendored
@ -1,12 +0,0 @@
|
||||
# Protect ownership and automation rules.
|
||||
/.github/CODEOWNERS @openclaw/openclaw-secops
|
||||
/.github/dependabot.yml @openclaw/openclaw-secops
|
||||
/.github/workflows/ @openclaw/openclaw-secops
|
||||
|
||||
# Release, backup, and package integrity surfaces.
|
||||
/.goreleaser.yaml @openclaw/openclaw-secops
|
||||
/go.mod @openclaw/openclaw-secops
|
||||
/go.sum @openclaw/openclaw-secops
|
||||
/scripts/*backup* @openclaw/openclaw-secops
|
||||
/scripts/*release* @openclaw/openclaw-secops
|
||||
/scripts/*publish* @openclaw/openclaw-secops
|
||||
61
.github/workflows/ci.yml
vendored
61
.github/workflows/ci.yml
vendored
@ -30,13 +30,13 @@ jobs:
|
||||
- name: Lint
|
||||
uses: golangci/golangci-lint-action@v9.2.0
|
||||
with:
|
||||
version: v2.12.1
|
||||
version: v2.11.1
|
||||
|
||||
- name: Install analyzers
|
||||
run: |
|
||||
go install honnef.co/go/tools/cmd/staticcheck@v0.7.0
|
||||
go install mvdan.cc/gofumpt@v0.9.2
|
||||
go install github.com/securego/gosec/v2/cmd/gosec@v2.26.1
|
||||
go install honnef.co/go/tools/cmd/staticcheck@2025.1.1
|
||||
go install mvdan.cc/gofumpt@v0.7.0
|
||||
go install github.com/securego/gosec/v2/cmd/gosec@v2.22.9
|
||||
|
||||
- name: Vet
|
||||
run: go vet ./...
|
||||
@ -70,10 +70,10 @@ jobs:
|
||||
cache: true
|
||||
|
||||
- name: Test with coverage
|
||||
run: go test -count=1 ./... -coverprofile=coverage.out
|
||||
run: go test ./... -coverprofile=coverage.out
|
||||
|
||||
- name: Test with race detector
|
||||
run: go test -count=1 -race ./...
|
||||
run: go test -race ./...
|
||||
|
||||
- name: Enforce coverage floor
|
||||
run: |
|
||||
@ -83,27 +83,15 @@ jobs:
|
||||
print "missing coverage total"
|
||||
exit 1
|
||||
}
|
||||
if (total + 0 < 85.0) {
|
||||
printf("coverage %.1f%% is below 85%%\n", total + 0)
|
||||
if (total + 0 < 80.0) {
|
||||
printf("coverage %.1f%% is below 80%%\n", total + 0)
|
||||
exit 1
|
||||
}
|
||||
printf("coverage %.1f%%\n", total + 0)
|
||||
}'
|
||||
|
||||
- name: Build
|
||||
run: go build -o bin/discrawl ./cmd/discrawl
|
||||
|
||||
- name: Smoke test CLI control surface
|
||||
run: |
|
||||
set -euo pipefail
|
||||
output="$(./bin/discrawl help)"
|
||||
printf '%s\n' "$output"
|
||||
printf '%s' "$output" | grep -q "metadata"
|
||||
printf '%s' "$output" | grep -q "tui"
|
||||
test -n "$(./bin/discrawl --version)"
|
||||
./bin/discrawl metadata --json | grep -q '"schema_version"'
|
||||
./bin/discrawl status --json | grep -q '"databases"'
|
||||
./bin/discrawl tui --json | grep -q '^\['
|
||||
run: go build ./cmd/discrawl
|
||||
|
||||
deps:
|
||||
runs-on: ubuntu-latest
|
||||
@ -121,39 +109,12 @@ jobs:
|
||||
- name: Verify module cache
|
||||
run: go mod verify
|
||||
|
||||
- name: Check go.mod tidy
|
||||
run: |
|
||||
go mod tidy
|
||||
git diff --exit-code -- go.mod go.sum
|
||||
|
||||
- name: Install govulncheck
|
||||
run: go install golang.org/x/vuln/cmd/govulncheck@v1.3.0
|
||||
run: go install golang.org/x/vuln/cmd/govulncheck@v1.1.4
|
||||
|
||||
- name: Run govulncheck
|
||||
run: '"$(go env GOPATH)/bin/govulncheck" ./...'
|
||||
|
||||
release-check:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v6.4.0
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
|
||||
- name: Snapshot release build
|
||||
uses: goreleaser/goreleaser-action@v7.2.1
|
||||
with:
|
||||
distribution: goreleaser
|
||||
version: "~> v2"
|
||||
args: release --snapshot --clean --skip=publish
|
||||
|
||||
secrets:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
@ -170,7 +131,7 @@ jobs:
|
||||
cache: true
|
||||
|
||||
- name: Install gitleaks
|
||||
run: go install github.com/zricethezav/gitleaks/v8@v8.30.1
|
||||
run: go install github.com/zricethezav/gitleaks/v8@v8.30.0
|
||||
|
||||
- name: Scan git history
|
||||
run: |
|
||||
|
||||
37
.github/workflows/codeql.yml
vendored
37
.github/workflows/codeql.yml
vendored
@ -1,37 +0,0 @@
|
||||
name: CodeQL
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
- cron: "29 4 * * 1"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
actions: read
|
||||
contents: read
|
||||
security-events: write
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: analyze
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup Go
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
languages: go
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v4
|
||||
38
.github/workflows/discord-backup-report.yml
vendored
38
.github/workflows/discord-backup-report.yml
vendored
@ -26,6 +26,11 @@ jobs:
|
||||
go-version-file: go.mod
|
||||
cache: true
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v6.0.0
|
||||
with:
|
||||
node-version: "24"
|
||||
|
||||
- name: Restore Discord DB cache
|
||||
id: restore-discord-db
|
||||
uses: actions/cache/restore@v5.0.5
|
||||
@ -46,27 +51,46 @@ jobs:
|
||||
|
||||
- name: Generate daily Discord report
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
DISCORD_BACKUP_TOKEN: ${{ secrets.DISCORD_BACKUP_TOKEN }}
|
||||
DISCRAWL_BACKUP_REPOSITORY: ${{ secrets.DISCRAWL_BACKUP_REPOSITORY }}
|
||||
GH_TOKEN: ${{ secrets.DISCORD_FIELD_NOTES_GITHUB_TOKEN || github.token }}
|
||||
CONFIG: ${{ runner.temp }}/discrawl/config.toml
|
||||
DB: ${{ github.workspace }}/.discrawl-ci/discrawl.db
|
||||
BACKUP_REPO: ${{ runner.temp }}/discord-backup
|
||||
OPENCLAW_STATE_DIR: ${{ runner.temp }}/openclaw
|
||||
DISCORD_FIELD_NOTES_GITHUB_REPO: openclaw/openclaw
|
||||
run: |
|
||||
if [ -z "${DISCRAWL_BACKUP_REPOSITORY:-}" ]; then
|
||||
echo "::notice title=Backup report skipped::Configure DISCRAWL_BACKUP_REPOSITORY as owner/repo to enable archive reports."
|
||||
exit 0
|
||||
fi
|
||||
if [ -z "${DISCORD_BACKUP_TOKEN:-}" ]; then
|
||||
echo "::error title=Missing secret::Configure DISCORD_BACKUP_TOKEN with write access to the backup repository."
|
||||
echo "::error title=Missing secret::Configure DISCORD_BACKUP_TOKEN with write access to openclaw/discord-backup."
|
||||
exit 1
|
||||
fi
|
||||
BACKUP_REMOTE="https://x-access-token:${DISCORD_BACKUP_TOKEN}@github.com/${DISCRAWL_BACKUP_REPOSITORY}.git"
|
||||
BACKUP_REMOTE="https://x-access-token:${DISCORD_BACKUP_TOKEN}@github.com/openclaw/discord-backup.git"
|
||||
mkdir -p "$(dirname "$CONFIG")"
|
||||
mkdir -p "$(dirname "$DB")"
|
||||
git clone "$BACKUP_REMOTE" "$BACKUP_REPO"
|
||||
printf 'db_path = "%s"\n' "$DB" > "$CONFIG"
|
||||
go run ./cmd/discrawl --config "$CONFIG" subscribe --repo "$BACKUP_REPO" "$BACKUP_REMOTE"
|
||||
go run ./cmd/discrawl --config "$CONFIG" report --readme "$BACKUP_REPO/README.md"
|
||||
if [ -n "${OPENAI_API_KEY:-}" ]; then
|
||||
npm install -g openclaw@latest
|
||||
openclaw onboard \
|
||||
--non-interactive \
|
||||
--mode local \
|
||||
--auth-choice openai-api-key \
|
||||
--secret-input-mode ref \
|
||||
--accept-risk \
|
||||
--skip-daemon \
|
||||
--skip-skills \
|
||||
--skip-search \
|
||||
--skip-health
|
||||
tmp_config="$(mktemp)"
|
||||
jq '.agents.defaults.model = "openai/gpt-5.2" | .agents.defaults.timeoutSeconds = 300 | .agents.defaults.llm.idleTimeoutSeconds = 240' \
|
||||
"$OPENCLAW_STATE_DIR/openclaw.json" > "$tmp_config"
|
||||
mv "$tmp_config" "$OPENCLAW_STATE_DIR/openclaw.json"
|
||||
scripts/discord-backup-field-notes.sh "$CONFIG" "$BACKUP_REPO"
|
||||
else
|
||||
echo "OPENAI_API_KEY not configured; skipping OpenClaw field notes"
|
||||
fi
|
||||
if git -C "$BACKUP_REPO" diff --quiet README.md; then
|
||||
echo "README already up to date"
|
||||
exit 0
|
||||
|
||||
52
.github/workflows/pages.yml
vendored
52
.github/workflows/pages.yml
vendored
@ -1,52 +0,0 @@
|
||||
name: Pages
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "docs/**"
|
||||
- "scripts/build-docs-site.mjs"
|
||||
- ".github/workflows/pages.yml"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: pages
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
name: Deploy docs
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deployment.outputs.page_url }}
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 24
|
||||
|
||||
- name: Build site
|
||||
run: node scripts/build-docs-site.mjs
|
||||
|
||||
- name: Configure Pages
|
||||
uses: actions/configure-pages@v6
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v5
|
||||
with:
|
||||
path: dist/docs-site
|
||||
|
||||
- name: Deploy
|
||||
id: deployment
|
||||
uses: actions/deploy-pages@v5
|
||||
16
.github/workflows/publish-discord-backup.yml
vendored
16
.github/workflows/publish-discord-backup.yml
vendored
@ -48,40 +48,30 @@ jobs:
|
||||
env:
|
||||
DISCORD_BOT_TOKEN: ${{ secrets.DISCORD_BOT_TOKEN }}
|
||||
DISCORD_BACKUP_TOKEN: ${{ secrets.DISCORD_BACKUP_TOKEN }}
|
||||
DISCRAWL_BACKUP_REPOSITORY: ${{ secrets.DISCRAWL_BACKUP_REPOSITORY }}
|
||||
DISCRAWL_GUILD_ID: ${{ secrets.DISCRAWL_GUILD_ID }}
|
||||
CONFIG: ${{ runner.temp }}/discrawl/config.toml
|
||||
DB: ${{ github.workspace }}/.discrawl-ci/discrawl.db
|
||||
BACKUP_REPO: ${{ runner.temp }}/discord-backup
|
||||
run: |
|
||||
if [ -z "${DISCRAWL_BACKUP_REPOSITORY:-}" ]; then
|
||||
echo "::notice title=Backup publish skipped::Configure DISCRAWL_BACKUP_REPOSITORY as owner/repo to enable archive publishing."
|
||||
exit 0
|
||||
fi
|
||||
if [ -z "${DISCORD_BOT_TOKEN:-}" ]; then
|
||||
echo "::error title=Missing secret::Configure DISCORD_BOT_TOKEN in the discrawl repo secrets."
|
||||
exit 1
|
||||
fi
|
||||
if [ -z "${DISCORD_BACKUP_TOKEN:-}" ]; then
|
||||
echo "::error title=Missing secret::Configure DISCORD_BACKUP_TOKEN with write access to the backup repository."
|
||||
echo "::error title=Missing secret::Configure DISCORD_BACKUP_TOKEN with write access to openclaw/discord-backup."
|
||||
exit 1
|
||||
fi
|
||||
if [ -z "${DISCRAWL_GUILD_ID:-}" ]; then
|
||||
echo "::error title=Missing secret::Configure DISCRAWL_GUILD_ID with the Discord guild to publish."
|
||||
exit 1
|
||||
fi
|
||||
BACKUP_REMOTE="https://x-access-token:${DISCORD_BACKUP_TOKEN}@github.com/${DISCRAWL_BACKUP_REPOSITORY}.git"
|
||||
BACKUP_REMOTE="https://x-access-token:${DISCORD_BACKUP_TOKEN}@github.com/openclaw/discord-backup.git"
|
||||
mkdir -p "$(dirname "$CONFIG")"
|
||||
mkdir -p "$(dirname "$DB")"
|
||||
git clone "$BACKUP_REMOTE" "$BACKUP_REPO"
|
||||
go run ./cmd/discrawl --config "$CONFIG" init --db "$DB" --guild "$DISCRAWL_GUILD_ID"
|
||||
if [ -f "$BACKUP_REPO/manifest.json" ]; then
|
||||
if [ -s "$DB" ]; then
|
||||
echo "Restored Discord DB cache at $DB; skipping pre-sync snapshot import."
|
||||
else
|
||||
echo "Discord DB cache missing; importing latest published snapshot before latest-only sync."
|
||||
go run ./cmd/discrawl --config "$CONFIG" update --repo "$BACKUP_REPO" --remote "$BACKUP_REMOTE"
|
||||
fi
|
||||
go run ./cmd/discrawl --config "$CONFIG" update --repo "$BACKUP_REPO" --remote "$BACKUP_REMOTE"
|
||||
fi
|
||||
go run ./cmd/discrawl --config "$CONFIG" sync --guild "$DISCRAWL_GUILD_ID" --skip-members --latest-only
|
||||
git -C "$BACKUP_REPO" pull --ff-only origin main
|
||||
|
||||
61
.github/workflows/release.yml
vendored
61
.github/workflows/release.yml
vendored
@ -37,69 +37,10 @@ jobs:
|
||||
run: git checkout ${{ inputs.tag }}
|
||||
|
||||
- name: GoReleaser
|
||||
uses: goreleaser/goreleaser-action@v7.2.1
|
||||
uses: goreleaser/goreleaser-action@v7
|
||||
with:
|
||||
distribution: goreleaser
|
||||
version: "~> v2"
|
||||
args: release --clean --config /tmp/.goreleaser.yaml
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
update-homebrew-tap:
|
||||
runs-on: ubuntu-latest
|
||||
needs: goreleaser
|
||||
steps:
|
||||
- name: Resolve release tag
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "RELEASE_TAG=${{ inputs.tag }}" >> "$GITHUB_ENV"
|
||||
else
|
||||
echo "RELEASE_TAG=${{ github.ref_name }}" >> "$GITHUB_ENV"
|
||||
fi
|
||||
|
||||
- name: Dispatch tap formula update
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }}
|
||||
run: |
|
||||
if [ -z "$GH_TOKEN" ]; then
|
||||
echo "::error::Set HOMEBREW_TAP_TOKEN with workflow access to steipete/homebrew-tap"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
request_id="discrawl-${RELEASE_TAG}-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
|
||||
expected_title="Update discrawl for ${RELEASE_TAG} (${request_id})"
|
||||
|
||||
gh workflow run update-formula.yml \
|
||||
--repo steipete/homebrew-tap \
|
||||
--ref main \
|
||||
-f formula=discrawl \
|
||||
-f tag="$RELEASE_TAG" \
|
||||
-f repository=openclaw/discrawl \
|
||||
-f artifact_template="{formula}_{version}_{target}.tar.gz" \
|
||||
-f request_id="$request_id"
|
||||
|
||||
run_id=""
|
||||
for _ in {1..30}; do
|
||||
run_id=$(gh run list \
|
||||
--repo steipete/homebrew-tap \
|
||||
--workflow update-formula.yml \
|
||||
--branch main \
|
||||
--event workflow_dispatch \
|
||||
--limit 20 \
|
||||
--json databaseId,displayTitle \
|
||||
--jq ".[] | select(.displayTitle == \"$expected_title\") | .databaseId" | head -n1)
|
||||
if [ -n "$run_id" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
if [ -z "$run_id" ]; then
|
||||
echo "::error::Could not find tap workflow run with title: $expected_title"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
gh run watch "$run_id" \
|
||||
--repo steipete/homebrew-tap \
|
||||
--exit-status \
|
||||
--interval 10
|
||||
|
||||
63
.github/workflows/secret-scan.yml
vendored
63
.github/workflows/secret-scan.yml
vendored
@ -1,63 +0,0 @@
|
||||
name: "Security Gate: Secret Scanning"
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ["**"]
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
trufflehog:
|
||||
name: Scan for Verified Secrets
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Resolve scan range
|
||||
id: scan_range
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
||||
PUSH_BASE_SHA: ${{ github.event.before }}
|
||||
PUSH_HEAD_SHA: ${{ github.sha }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
zero_sha="0000000000000000000000000000000000000000"
|
||||
|
||||
if [[ "$EVENT_NAME" == "pull_request" ]]; then
|
||||
base="$PR_BASE_SHA"
|
||||
head="$PR_HEAD_SHA"
|
||||
else
|
||||
base="$PUSH_BASE_SHA"
|
||||
head="$PUSH_HEAD_SHA"
|
||||
if [[ -z "$base" || "$base" == "$zero_sha" ]]; then
|
||||
base="origin/$DEFAULT_BRANCH"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "base=$base" >> "$GITHUB_OUTPUT"
|
||||
echo "head=$head" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: TruffleHog OSS
|
||||
id: trufflehog
|
||||
uses: trufflesecurity/trufflehog@v3.95.2
|
||||
with:
|
||||
path: ./
|
||||
base: ${{ steps.scan_range.outputs.base }}
|
||||
head: ${{ steps.scan_range.outputs.head }}
|
||||
extra_args: --only-verified --debug
|
||||
|
||||
- name: Notify on failure
|
||||
if: steps.trufflehog.outcome == 'failure'
|
||||
run: |
|
||||
echo "::error::Verified secrets found. Rotate the credential before merging."
|
||||
exit 1
|
||||
86
.github/workflows/stale.yml
vendored
86
.github/workflows/stale.yml
vendored
@ -1,86 +0,0 @@
|
||||
name: Stale
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "25 4 * * *"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Mark stale unassigned issues and pull requests
|
||||
uses: actions/stale@v10
|
||||
with:
|
||||
days-before-issue-stale: 14
|
||||
days-before-issue-close: 7
|
||||
days-before-pr-stale: 14
|
||||
days-before-pr-close: 7
|
||||
stale-issue-label: stale
|
||||
stale-pr-label: stale
|
||||
exempt-issue-labels: enhancement,maintainer,pinned,security,no-stale
|
||||
exempt-pr-labels: maintainer,no-stale
|
||||
operations-per-run: 1000
|
||||
ascending: true
|
||||
exempt-all-assignees: true
|
||||
remove-stale-when-updated: true
|
||||
stale-issue-message: |
|
||||
This issue has been automatically marked as stale due to inactivity.
|
||||
Please add updated discrawl details or it will be closed.
|
||||
stale-pr-message: |
|
||||
This pull request has been automatically marked as stale due to inactivity.
|
||||
Please update it or it will be closed.
|
||||
close-issue-message: |
|
||||
Closing due to inactivity.
|
||||
If this still affects discrawl, open a new issue with current reproduction details.
|
||||
close-issue-reason: not_planned
|
||||
close-pr-message: |
|
||||
Closing due to inactivity.
|
||||
If this PR should be revived, reopen it with current context and validation.
|
||||
|
||||
- name: Mark stale assigned issues
|
||||
uses: actions/stale@v10
|
||||
with:
|
||||
days-before-issue-stale: 30
|
||||
days-before-issue-close: 10
|
||||
days-before-pr-stale: -1
|
||||
days-before-pr-close: -1
|
||||
stale-issue-label: stale
|
||||
exempt-issue-labels: enhancement,maintainer,pinned,security,no-stale
|
||||
operations-per-run: 1000
|
||||
ascending: true
|
||||
include-only-assigned: true
|
||||
remove-stale-when-updated: true
|
||||
stale-issue-message: |
|
||||
This assigned issue has been automatically marked as stale after 30 days of inactivity.
|
||||
Please add an update or it will be closed.
|
||||
close-issue-message: |
|
||||
Closing due to inactivity.
|
||||
If this still affects discrawl, reopen or file a new issue with current evidence.
|
||||
close-issue-reason: not_planned
|
||||
|
||||
- name: Mark stale assigned pull requests
|
||||
uses: actions/stale@v10
|
||||
with:
|
||||
days-before-issue-stale: -1
|
||||
days-before-issue-close: -1
|
||||
days-before-pr-stale: 27
|
||||
days-before-pr-close: 7
|
||||
stale-pr-label: stale
|
||||
exempt-pr-labels: maintainer,no-stale
|
||||
operations-per-run: 1000
|
||||
ascending: true
|
||||
include-only-assigned: true
|
||||
ignore-pr-updates: true
|
||||
remove-stale-when-updated: true
|
||||
stale-pr-message: |
|
||||
This assigned pull request has been automatically marked as stale after being open for 27 days.
|
||||
Please add an update or it will be closed.
|
||||
close-pr-message: |
|
||||
Closing due to inactivity.
|
||||
If this PR should be revived, reopen it with current context and validation.
|
||||
@ -2,33 +2,15 @@ version: "2"
|
||||
|
||||
linters:
|
||||
enable:
|
||||
- asasalint
|
||||
- bidichk
|
||||
- bodyclose
|
||||
- canonicalheader
|
||||
- copyloopvar
|
||||
- dupword
|
||||
- durationcheck
|
||||
- errcheck
|
||||
- errchkjson
|
||||
- errorlint
|
||||
- exptostd
|
||||
- gocheckcompilerdirectives
|
||||
- gocritic
|
||||
- gomoddirectives
|
||||
- govet
|
||||
- intrange
|
||||
- ineffassign
|
||||
- makezero
|
||||
- misspell
|
||||
- modernize
|
||||
- nilerr
|
||||
- nilnesserr
|
||||
- noctx
|
||||
- nolintlint
|
||||
- nosprintfhostport
|
||||
- perfsprint
|
||||
- predeclared
|
||||
- rowserrcheck
|
||||
- sloglint
|
||||
- sqlclosecheck
|
||||
@ -36,8 +18,6 @@ linters:
|
||||
- testifylint
|
||||
- unconvert
|
||||
- unused
|
||||
- usetesting
|
||||
- usestdlibvars
|
||||
- wastedassign
|
||||
|
||||
formatters:
|
||||
|
||||
@ -12,7 +12,7 @@ builds:
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
ldflags:
|
||||
- -s -w -X github.com/openclaw/discrawl/internal/cli.version={{ .Version }}
|
||||
- -s -w -X github.com/steipete/discrawl/internal/cli.version={{ .Version }}
|
||||
targets:
|
||||
- darwin_amd64
|
||||
- darwin_arm64
|
||||
|
||||
136
CHANGELOG.md
136
CHANGELOG.md
@ -1,133 +1,8 @@
|
||||
# Changelog
|
||||
|
||||
## 0.7.0 - 2026-05-08
|
||||
All notable changes to `discrawl` will be documented in this file.
|
||||
|
||||
### Changes
|
||||
|
||||
- Added `discrawl tui`, a terminal archive browser for stored guild messages and local `@me` wiretap DMs using the shared crawlkit pane browser.
|
||||
- Added crawlkit-backed `metadata --json`, `status --json`, and `doctor --json` control surfaces for launchers, automation, and CI checks.
|
||||
- Published the generated documentation site at `discrawl.sh`, including command pages, install/setup docs, configuration, security notes, guides, a contact page, and social cards.
|
||||
- Moved the Go module and release metadata to `github.com/openclaw/discrawl`.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Kept documented command-local search flags working after the query, such as `discrawl search "term" --limit 5`. Thanks @PrinceOfEgypt.
|
||||
- Made the terminal browser more useful and accurate: default guild scoping, newest-message startup, compact panes, selected-message detail panes, count-header sorting, local/remote status labels, right-click actions, Discord message URLs, row labels, direct-message pane labels, mention rendering, inline mention resolution, attachment details, and reply-context hydration without broad thread scans.
|
||||
- Kept read-only commands such as `search`, `messages`, and safe `sql` usable while `tail` or another writer holds the sync lock. Thanks @PrinceOfEgypt.
|
||||
- Kept `tui --help`, status, and terminal-browser reads safe for fresh or missing local databases without triggering Git snapshot auto-update.
|
||||
- Kept local-only snapshot rows filtered during shared archive imports and forwarded snapshot import progress through the crawlkit import path.
|
||||
- Made stale Git snapshot imports plan shard deltas from crawlkit file fingerprints or Git object identity, so routine shared-archive refreshes import changed message tail shards instead of rebuilding every table and FTS index.
|
||||
- Included progress percentages in message-sync logs.
|
||||
- Fixed GoReleaser version stamping after the module path move.
|
||||
|
||||
### Documentation
|
||||
|
||||
- Documented the crawlkit-backed config/status/control, snapshot, mirror, sync-state, output, and shared TUI surfaces now used on `main`.
|
||||
- Clarified that Discord bot sync, desktop wiretap parsing, DM privacy filters, schema ownership, FTS/ranking, embeddings, and analytics remain app-owned.
|
||||
- Aligned terminal-browser docs with the gitcrawl-style shared TUI model: channel/person/thread groups, message rows, detail/thread panes, sorting, mouse selection, right-click actions, and local/remote status chrome.
|
||||
- Refreshed the repo-local `discrawl` agent skill for local Discord archive, freshness, query, boundary, TUI, verification, and read-only SQL workflows.
|
||||
|
||||
### Maintenance
|
||||
|
||||
- Migrated runtime paths, SQLite opening, archive mirror/export/import helpers, output/status wiring, and TUI plumbing onto the shared `crawlkit` infrastructure.
|
||||
- Moved reusable embedding providers and vector helpers onto `crawlkit` while keeping Discrawl-owned storage, FTS, queueing, and privacy filters local.
|
||||
- Updated crawlkit through `v0.4.1`, switched imports to `github.com/openclaw/crawlkit`, and added CI smoke coverage for the crawlkit control surface and merge behavior.
|
||||
- Added CodeQL, verified secret scanning, protected automation owners, stale issue automation, `.editorconfig`, and `.gitattributes`.
|
||||
- Added release workflow automation that dispatches the Homebrew tap formula update after GoReleaser publishes a tag.
|
||||
|
||||
## 0.6.6 - 2026-05-05
|
||||
|
||||
### Fixes
|
||||
|
||||
- `wiretap` now uses a fast default path for Discord Chromium cache imports: it scans cheap context files plus route-bearing HTTP cache entries, checkpoints file progress in batches, and leaves exhaustive historical cache archaeology behind `--full-cache` / `desktop.full_cache`.
|
||||
|
||||
## 0.6.5 - 2026-05-03
|
||||
|
||||
### Fixes
|
||||
|
||||
- Scheduled Discord backup publishing now skips redundant pre-sync snapshot imports when the workflow DB cache is warm, keeping fresh Git snapshots from getting delayed by a full archive reimport.
|
||||
- `discrawl sync` now keeps Git snapshot refreshes explicit by default; use `--update=auto` or `--update=force` when you want a sync run to pull/import the shared snapshot before live Discord or desktop-cache deltas.
|
||||
- Snapshot imports now emit phase/table/file progress and keep the sync lock file updated with the active phase, making long update/import runs diagnosable instead of looking hung.
|
||||
- Recent-message scans are backed by a plain `messages(created_at, id)` index so archive freshness and short-window analysis queries avoid full-table scans.
|
||||
|
||||
## 0.6.4 - 2026-05-03
|
||||
|
||||
### Fixes
|
||||
|
||||
- `discrawl` now handles SIGINT/SIGTERM by canceling active sync/import contexts so large SQLite and FTS writes can roll back and close cleanly instead of being terminated mid-transaction.
|
||||
|
||||
### Maintenance
|
||||
|
||||
- Refreshed dependency and CI tooling pins, including GoReleaser, `go-toml`, golangci-lint, and gosec.
|
||||
- Tightened CI compatibility with the latest linters and made signal-cancellation and sync fixture tests deterministic under the race detector.
|
||||
|
||||
## 0.6.3 - 2026-05-01
|
||||
|
||||
### Fixes
|
||||
|
||||
- Added OS keyring fallback for Discord bot-token resolution, keeping env as the first source and documenting the default keyring item. (#17)
|
||||
- Clarified and locked down FTS query normalization so operator-like search terms such as `AND`, `OR`, `NOT`, `NEAR`, and `*` stay parameterized and quoted before SQLite `MATCH`. Thanks @mvanhorn.
|
||||
|
||||
### Maintenance
|
||||
|
||||
- Tightened Go linting with additional golangci-lint checks for compiler directives, host/port formatting, predeclared identifiers, missing command contexts, and related code-quality regressions.
|
||||
- Updated test subprocess helpers to use test-scoped contexts and cleaned up assertions so the stricter CI suite stays green.
|
||||
|
||||
## 0.6.2 - 2026-05-01
|
||||
|
||||
### Changes
|
||||
|
||||
- Added `discrawl digest` for per-channel activity summaries with messages, replies, active authors, top posters, and top mentions. Thanks @mvanhorn.
|
||||
- Added `discrawl analytics quiet` and `discrawl analytics trends` for finding silent top-level channels and week-over-week channel volume. Thanks @mvanhorn.
|
||||
|
||||
### Fixes
|
||||
|
||||
- `discrawl digest` now reports reply counts as `replies` instead of mislabeling reply roots as Discord threads.
|
||||
- `discrawl sync` now serializes concurrent runs with a local lock, preventing two refreshes from writing the archive at the same time.
|
||||
- Git snapshot imports now keep SQLite crash recovery enabled and share the same archive lock as sync, update, tail, wiretap, embed, and auto-update reads so interrupted imports are less likely to corrupt the live database.
|
||||
- Git snapshot imports now recover from corrupt local FTS tables by dropping and rebuilding search indexes, and repair missing guild IDs from channel metadata so shared archive reports stay fresh.
|
||||
- Channel-history sync now falls back to the channel guild when Discord omits `message.guild_id`, keeping messages, attachments, mentions, and FTS rows correctly scoped.
|
||||
|
||||
## 0.6.1 - 2026-04-28
|
||||
|
||||
### Fixes
|
||||
|
||||
- Repeated `sync --source wiretap` runs now skip unchanged Discord Desktop cache files and report unchanged file counts, making steady-state local-cache refreshes much faster.
|
||||
- `sync --full --skip-members` now also skips member crawls when resuming incomplete stored channels, so backfills do not unexpectedly refresh the full guild member list.
|
||||
|
||||
### Maintenance
|
||||
|
||||
- Refactored sync-mode handling so routine latest syncs, `--all-channels`, `--full`, and member-refresh decisions share clearer internal paths with regression coverage.
|
||||
- Refreshed Go module dependencies and CI tool/action pins, including staticcheck, gofumpt, gosec, govulncheck, gitleaks, setup-node, and GoReleaser.
|
||||
- Hardened report README writes and Discord Desktop cache reads with root-scoped filesystem access to satisfy the latest gosec checks.
|
||||
|
||||
## 0.6.0 - 2026-04-24
|
||||
|
||||
### Changes
|
||||
|
||||
- `dms` now lists local wiretap DM conversations and can read or search one DM thread with `--with`, `--last`, and `--search`, so common DM queries no longer require raw SQL.
|
||||
- `search --dm` and `messages --dm` now target the local-only `@me` archive directly and skip Git snapshot auto-update, since DMs are never imported from the shared mirror.
|
||||
- Go module dependencies and lint rules were refreshed for the current Go toolchain, including stricter JSON marshal checks and modern simplification rules.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Wiretap now infers fallback DM channel names from cached Discord user/profile data, so channels discovered only from route/message cache entries resolve to names like `Vincent K` instead of `channel-*`.
|
||||
- Wiretap message output now preserves sanitized author labels in stored metadata, improving `dms` and `messages` output without storing raw desktop cache payloads.
|
||||
|
||||
### Tests
|
||||
|
||||
- Added regression coverage for DM channel-name inference from cached profile data when Discord Desktop cache lacks explicit channel recipient metadata.
|
||||
- Added coverage for local DM conversation listing/filtering, DM cleanup paths, share import/export helpers, CLI DM windows, and Discord Desktop import helper edge cases.
|
||||
- CI now runs uncached test and race suites, checks `go mod tidy`, and performs a snapshot GoReleaser build before release tags.
|
||||
|
||||
## 0.5.1 - 2026-04-24
|
||||
|
||||
### Fixes
|
||||
|
||||
- Git snapshot export/import now keeps wiretap DMs strictly local: `@me` rows, wiretap sync state, and DM vectors are excluded from published snapshots while existing local DM rows are preserved on import.
|
||||
- Publishing without `--with-embeddings` now omits old embedding manifests instead of carrying forward a stale vector bundle.
|
||||
|
||||
## 0.5.0 - 2026-04-24
|
||||
## Unreleased
|
||||
|
||||
### Changes
|
||||
|
||||
@ -172,10 +47,11 @@
|
||||
- `sync --all` now bypasses `default_guild_id` so one run can fan out across every discovered guild without clearing the single-guild default first
|
||||
- `sync --full` no longer aborts when forum thread discovery hits Discord `403 Missing Access`; inaccessible channels are skipped and marked unavailable while accessible channels continue syncing
|
||||
- startup now validates and stamps SQLite schema version via `PRAGMA user_version`, and fails fast if the local DB schema is newer than the running binary
|
||||
- `init --from-openclaw` now supports `--account`, and OpenClaw token fields can use `${ENV_VAR}` placeholders
|
||||
- git-backed archive sharing can now export/import compressed JSONL snapshots with manifests, subscribe to a Git repo as the data source, and run in git-only mode without Discord credentials
|
||||
- `messages`, `search`, and reports can automatically refresh stale git-backed data, preferring the Git snapshot before falling back to live Discord when both sources are configured
|
||||
- the Discord backup publisher workflow now syncs latest messages, publishes the archive to a private GitHub repo, serializes concurrent runs, validates required secrets, and skips the member crawl for faster updates
|
||||
- the backup report workflow now updates README activity stats from the backup action and keeps those queries bounded with process timeouts
|
||||
- the backup report workflow now updates README activity stats, supports OpenClaw-generated field notes, runs the field-note logic from the backup action, and keeps those queries bounded with process timeouts
|
||||
- `sync --latest-only` adds a lightweight refresh path for checking recent Discord messages without doing a full historical crawl
|
||||
- repository imports now skip expensive rebuilds when the snapshot manifest is already current, and GitHub Actions persist the warmed SQLite database across runs
|
||||
- the Docker git-source smoke test now verifies that a fresh install can subscribe to a repository-only archive and query messages, SQL, and reports
|
||||
@ -199,12 +75,12 @@
|
||||
- multi-guild Discord crawler with single-guild default UX
|
||||
- local SQLite archive with FTS5 search
|
||||
- commands: `init`, `sync`, `tail`, `search`, `messages`, `mentions`, `sql`, `members`, `channels`, `status`, `doctor`
|
||||
- env-based bot token discovery
|
||||
- OpenClaw config reuse plus env-based bot token discovery
|
||||
- resumable full-history sync, live gateway tailing, repair sync loop, targeted channel sync
|
||||
- attachment-text indexing for small text-like uploads
|
||||
- structured user and role mention indexing/querying
|
||||
- empty-message filtering based on real searchable/displayable content instead of raw body only
|
||||
- CI with lint, tests, secret scanning, and coverage enforcement
|
||||
- CI with lint, tests, secret scanning, and `80%+` coverage enforcement
|
||||
- release plumbing via GoReleaser, GitHub Actions, and Homebrew tap packaging
|
||||
- sync correctness fixes for empty channels, inaccessible channels, unknown channels, and large-channel resume behavior
|
||||
- SQLite/FTS performance fixes for backfill throughput and lower write amplification
|
||||
|
||||
248
README.md
248
README.md
@ -1,15 +1,8 @@
|
||||
# discrawl 🛰️ — Mirror Discord into SQLite; search server history locally
|
||||
|
||||
`discrawl` mirrors Discord guild data into local SQLite so you can search, inspect, and query server history without depending on Discord search. It can also import classifiable Discord Desktop cache messages for local DM recovery/search without using a user token. Teams can publish the guild archive as a private Git snapshot repo, so readers get fresh org memory without Discord bot credentials.
|
||||
`discrawl` mirrors Discord guild data into local SQLite so you can search, inspect, and query server history without depending on Discord search. It can also import classifiable Discord Desktop cache messages for DM recovery/search without using a user token. Teams can publish that archive as a private Git snapshot repo, so readers get fresh org memory without Discord bot credentials.
|
||||
|
||||
There are two local archive sources:
|
||||
|
||||
- Discord bot API sync for guilds, channels, members, threads, and message history the configured bot can access
|
||||
- Discord Desktop cache import for local, classifiable cached messages, including proven local-only DMs under `@me`
|
||||
|
||||
Desktop wiretap mode reads local cache artifacts only. It does not extract credentials, use user tokens, call the Discord API as your user, or run a selfbot.
|
||||
|
||||
Wiretap DMs stay local and are never exported to the Git-backed snapshot mirror.
|
||||
Live guild sync uses real bot tokens. Desktop wiretap mode reads local cache artifacts only; it does not extract credentials or run a selfbot. Data stays local unless you explicitly publish a Git-backed snapshot.
|
||||
|
||||
## What It Does
|
||||
|
||||
@ -22,9 +15,6 @@ Wiretap DMs stay local and are never exported to the Git-backed snapshot mirror.
|
||||
- tails Gateway events for live updates, with periodic repair syncs
|
||||
- imports classifiable Discord Desktop cache messages with `wiretap`, including proven DMs under `@me`
|
||||
- publishes and imports private Git-backed archive snapshots for org-wide read access
|
||||
- browses stored messages and local DMs in a terminal archive UI
|
||||
- exposes `metadata --json`, `status --json`, and `doctor --json` for local
|
||||
launchers, automation, and CI
|
||||
- supports Git-only read mode with no Discord credentials on reader machines
|
||||
- generates backup README activity reports, with optional AI-written field notes
|
||||
- exposes read-only SQL for ad hoc analysis
|
||||
@ -62,12 +52,12 @@ Without those intents/permissions, `sync`, `tail`, member snapshots, or message
|
||||
|
||||
Token resolution:
|
||||
|
||||
1. `DISCORD_BOT_TOKEN` or the configured `discord.token_env`
|
||||
2. OS keyring item `discrawl` / `discord_bot_token`, or the configured keyring service/account
|
||||
1. OpenClaw config, if `discord.token_source` is not `env`
|
||||
2. `DISCORD_BOT_TOKEN` or the configured `discord.token_env`
|
||||
|
||||
`discrawl` accepts either raw token text or a value prefixed with `Bot `. It normalizes that automatically.
|
||||
|
||||
Fastest path:
|
||||
Fastest env-only path:
|
||||
|
||||
```bash
|
||||
export DISCORD_BOT_TOKEN="your-bot-token"
|
||||
@ -83,20 +73,7 @@ export DISCORD_BOT_TOKEN="your-bot-token"
|
||||
|
||||
Then reload your shell before running `discrawl`.
|
||||
|
||||
If you prefer the OS keyring, keep the token out of config and store it in the default keyring item:
|
||||
|
||||
```bash
|
||||
# macOS Keychain
|
||||
security add-generic-password -U -s discrawl -a discord_bot_token -w "$DISCORD_BOT_TOKEN"
|
||||
|
||||
# Linux Secret Service / libsecret
|
||||
printf %s "$DISCORD_BOT_TOKEN" | secret-tool store --label="discrawl Discord bot token" service discrawl username discord_bot_token
|
||||
|
||||
# Windows Credential Manager
|
||||
cmdkey /generic:discrawl:discord_bot_token /user:discord_bot_token /pass:%DISCORD_BOT_TOKEN%
|
||||
```
|
||||
|
||||
Set `discord.token_source = "keyring"` if you want to require keyring lookup instead of env-first fallback.
|
||||
If you already use OpenClaw, `discrawl` can reuse the Discord token from `~/.openclaw/openclaw.json` by default.
|
||||
|
||||
Default runtime paths:
|
||||
|
||||
@ -117,7 +94,7 @@ discrawl --version
|
||||
Build from source:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/openclaw/discrawl.git
|
||||
git clone https://github.com/steipete/discrawl.git
|
||||
cd discrawl
|
||||
go build -o bin/discrawl ./cmd/discrawl
|
||||
./bin/discrawl --version
|
||||
@ -127,24 +104,38 @@ Examples below assume `discrawl` is on `PATH`. If you built from source without
|
||||
|
||||
## Quick Start
|
||||
|
||||
Configure a Discord bot token and refresh both bot-visible guild data and local desktop cache data:
|
||||
Reuse an existing OpenClaw Discord bot config:
|
||||
|
||||
```bash
|
||||
export DISCORD_BOT_TOKEN="..."
|
||||
discrawl init
|
||||
discrawl init --from-openclaw ~/.openclaw/openclaw.json
|
||||
discrawl doctor
|
||||
discrawl sync --full
|
||||
discrawl sync
|
||||
discrawl search "panic: nil pointer"
|
||||
discrawl tail
|
||||
discrawl wiretap
|
||||
```
|
||||
|
||||
Use `discrawl sync --source wiretap` when you only want the local Discord Desktop cache import and do not want bot-token API sync.
|
||||
Multi-account OpenClaw setup:
|
||||
|
||||
```bash
|
||||
discrawl init --from-openclaw ~/.openclaw/openclaw.json --account atlas
|
||||
```
|
||||
|
||||
Env-only setup:
|
||||
|
||||
```bash
|
||||
export DISCORD_BOT_TOKEN="..."
|
||||
discrawl doctor
|
||||
discrawl init
|
||||
discrawl sync --full
|
||||
discrawl sync
|
||||
```
|
||||
|
||||
Git-only reader setup:
|
||||
|
||||
```bash
|
||||
discrawl subscribe https://github.com/example/discord-archive.git
|
||||
discrawl subscribe https://github.com/openclaw/discord-backup.git
|
||||
discrawl search "launch checklist"
|
||||
discrawl messages --channel general --hours 24
|
||||
```
|
||||
@ -162,94 +153,55 @@ discrawl messages --channel general --hours 24
|
||||
|
||||
## Commands
|
||||
|
||||
### `tui`
|
||||
|
||||
Opens the local terminal archive browser for stored messages.
|
||||
|
||||
```bash
|
||||
discrawl tui
|
||||
discrawl tui --guild 123456789012345678 --channel general
|
||||
discrawl tui --dm
|
||||
discrawl --json tui --limit 50
|
||||
```
|
||||
|
||||
The terminal browser uses the shared crawlkit explorer. The left pane groups
|
||||
channels, people, or threads; the middle pane lists messages; the right pane
|
||||
shows the selected message, surrounding conversation, and thread detail. Mouse
|
||||
selection, right-click actions, sortable headers, and the local/remote footer
|
||||
follow the same interaction model as `gitcrawl tui`. See
|
||||
[`docs/commands/tui.md`](docs/commands/tui.md) for flags and read-only/DM scope
|
||||
notes.
|
||||
|
||||
### `init`
|
||||
|
||||
Creates the local config and discovers accessible guilds.
|
||||
|
||||
```bash
|
||||
discrawl init
|
||||
discrawl init --from-openclaw ~/.openclaw/openclaw.json
|
||||
discrawl init --from-openclaw ~/.openclaw/openclaw.json --account atlas
|
||||
discrawl init --guild 123456789012345678
|
||||
discrawl init --db ~/data/discrawl.db
|
||||
```
|
||||
|
||||
When OpenClaw config tokens use `${ENV_VAR}` placeholders, `init` and `doctor` resolve them before auth.
|
||||
|
||||
### `sync`
|
||||
|
||||
Refreshes SQLite from one or both archive sources.
|
||||
|
||||
By default, `sync` runs both live/local sources and does not import the Git snapshot first:
|
||||
|
||||
- Discord bot-token sync for bot-visible guild data
|
||||
- local Discord Desktop cache import for classifiable cached messages and proven DMs
|
||||
|
||||
Use `discrawl update` when you want to pull/import the shared Git snapshot. If you intentionally want a sync run to import the snapshot before live deltas, pass `--update=auto` to import only when stale or `--update=force` to pull/import before syncing. `--no-update` is accepted as an explicit no-op alias for the default.
|
||||
|
||||
Run one explicit `--full` pass when you want a complete historical guild archive. Use plain `sync` afterward for frequent latest-message and desktop-cache refreshes.
|
||||
Refreshes guild state into SQLite. Run one explicit `--full` pass when you want a complete historical archive; use plain `sync` afterward for frequent latest-message refreshes.
|
||||
|
||||
```bash
|
||||
discrawl sync
|
||||
discrawl sync --update=auto
|
||||
discrawl sync --update=force
|
||||
discrawl sync --no-update
|
||||
discrawl sync --full
|
||||
discrawl sync --full --all
|
||||
discrawl sync --guild 123456789012345678
|
||||
discrawl sync --guilds 123,456 --concurrency 8
|
||||
discrawl sync --source both # default: bot API + desktop cache
|
||||
discrawl sync --source discord # bot API only; aliases: key, bot, api
|
||||
discrawl sync --source wiretap # desktop cache only; aliases: desktop, cache
|
||||
discrawl sync --source both
|
||||
discrawl sync --source discord
|
||||
discrawl sync --source wiretap
|
||||
discrawl sync --guild 123456789012345678 --all-channels
|
||||
discrawl sync --channels 111,222 --since 2026-03-01T00:00:00Z
|
||||
```
|
||||
|
||||
Sync sources:
|
||||
|
||||
| Source | Reads from | Stores |
|
||||
| --- | --- | --- |
|
||||
| `both` | Discord bot API and local Discord Desktop cache | bot-visible guild data plus classifiable cached desktop messages |
|
||||
| `discord` / `key` | Discord bot API | guilds, channels, threads, members, and messages the bot can access |
|
||||
| `wiretap` | local Discord Desktop cache files | classifiable cached messages; proven DMs are stored under `@me` |
|
||||
|
||||
Sync modes control the Discord bot API side of a run. When `wiretap` is selected, the desktop cache import runs once alongside the chosen bot sync mode.
|
||||
|
||||
Bot sync modes:
|
||||
Sync modes:
|
||||
|
||||
| Command | Use when | Behavior |
|
||||
| --- | --- | --- |
|
||||
| `discrawl sync` | routine refresh | skips member refreshes, checks live top-level channels plus active threads, and only fetches new messages for channels with a stored latest cursor |
|
||||
| `discrawl sync --update=auto` | hybrid Git/live refresh | imports a stale Git snapshot first, then runs the routine live refresh |
|
||||
| `discrawl sync` | routine refresh | imports any stale Git snapshot first, skips member refreshes, checks live top-level channels plus active threads, and only fetches new messages for channels with a stored latest cursor |
|
||||
| `discrawl sync --all-channels` | repair pass | broad incremental sweep across every stored channel/thread, including archived threads |
|
||||
| `discrawl sync --full` | historical backfill | crawls older history until channels are complete; can take a long time on large servers |
|
||||
|
||||
`sync` already uses parallel channel workers for bot API message crawling.
|
||||
`--concurrency` overrides the default, and the default is auto-sized from `GOMAXPROCS` with a floor of `8` and a cap of `32`.
|
||||
`sync` already uses parallel channel workers. `--concurrency` overrides the default, and the default is auto-sized from `GOMAXPROCS` with a floor of `8` and a cap of `32`.
|
||||
`--source` selects what gets refreshed: `both` (default), `discord`/`key` for bot-token API sync only, or `wiretap` for local Discord Desktop cache import only.
|
||||
`--all` ignores `default_guild_id` and fans out across every discovered guild the bot can access.
|
||||
`--skip-members` refreshes guild/channel/message data without crawling the full member list, which is useful for frequent Git snapshot publishers that only need latest messages.
|
||||
`--latest-only` is still accepted for explicit latest-only runs; it is now the default for untargeted `sync`. Use `--all-channels` to opt out of the fast default without doing a full historical crawl.
|
||||
When `--channels` includes a forum channel id, `discrawl` expands that forum's threads and syncs their messages as part of the targeted run.
|
||||
`--since` limits initial history/bootstrap and full-history backfill to messages at or after the given RFC3339 timestamp. It does not mark older history as complete, so a later `sync --full` without `--since` can continue the backfill.
|
||||
Long runs now emit periodic progress logs to stderr so large backfills and Git snapshot imports do not look hung.
|
||||
Long runs now emit periodic progress logs to stderr so large backfills do not look hung.
|
||||
If in-flight channels stop completing for a while, `discrawl` now emits `message sync waiting` heartbeat logs with the oldest active channel, per-channel page activity, and skip/defer counters, and every run ends with a `message sync finished` summary.
|
||||
Each channel crawl also has a bounded runtime budget, so a pathological channel is deferred and retried on the next sync instead of pinning a worker forever.
|
||||
Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl, and marker cleanup is best-effort so one missing local sync-state row cannot crash the run.
|
||||
Full sync member refresh is best-effort and currently gives up after five minutes without a caller-supplied deadline, so message sync completion is not held hostage by a slow guild member crawl.
|
||||
When the archive is already complete, `sync --full` now reuses the stored backlog markers and limits steady-state refresh to live top-level channels plus active threads instead of revisiting every stored archived thread.
|
||||
If a guild already has a local member snapshot, routine syncs reuse it and skip another full member crawl until that snapshot ages out.
|
||||
@ -266,30 +218,21 @@ discrawl tail --repair-every 30m
|
||||
|
||||
### `wiretap`
|
||||
|
||||
Imports classifiable Discord Desktop message payloads into the same local SQLite archive.
|
||||
|
||||
This is the path for searchable DMs because bot tokens cannot read personal direct messages.
|
||||
|
||||
`wiretap` is also available through `discrawl sync --source wiretap` and is included in the default `discrawl sync --source both` path.
|
||||
Imports classifiable Discord Desktop message payloads into the same local SQLite archive. This is the path for searchable DMs because bot tokens cannot read personal direct messages.
|
||||
|
||||
```bash
|
||||
discrawl wiretap
|
||||
discrawl wiretap --path "$HOME/Library/Application Support/discord"
|
||||
discrawl wiretap --dry-run
|
||||
discrawl wiretap --full-cache
|
||||
discrawl wiretap --watch-every 2m
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- stores classifiable cache messages in the same `guilds`, `channels`, and `messages` tables used by bot sync
|
||||
- stores only classifiable cache messages in the normal `guilds` / `channels` / `messages` tables
|
||||
- stores proven DMs under the synthetic guild id `@me`
|
||||
- keeps `@me` rows local-only: `publish`, Git snapshot import/export, and optional embedding snapshot export exclude DM guilds, channels, messages, events, attachments, mentions, wiretap sync state, and vectors for DM messages
|
||||
- preserves existing local `@me` guilds, channels, messages, and attachments when importing a Git snapshot, so a shared guild mirror refresh does not wipe local wiretap DM search
|
||||
- drops message payloads whose channel cannot be classified from cached channel metadata or Discord route URLs; dropped rows are counted as `skipped_messages`
|
||||
- imports what Discord Desktop has cached locally, not complete live DM history
|
||||
- scans local `.ldb`, `.log`, `.json`, and `.txt` artifacts for Discord message JSON, plus route-bearing Chromium HTTP cache entries by default
|
||||
- use `--full-cache` or `desktop.full_cache = true` for exhaustive Chromium cache import when you want slower historical guild-cache archaeology
|
||||
- drops message payloads whose channel cannot be classified from cached channel metadata or Discord route URLs
|
||||
- scans local `.ldb`, `.log`, `.json`, and `.txt` artifacts for Discord message JSON
|
||||
- does not extract, store, or print Discord auth tokens
|
||||
- `--max-file-bytes` skips unusually large files; default is 64 MiB
|
||||
|
||||
@ -303,7 +246,6 @@ discrawl search --mode fts "panic: nil pointer"
|
||||
discrawl search --mode semantic "missing launch checklist"
|
||||
discrawl search --mode hybrid "database timeout"
|
||||
discrawl search --guild 123456789012345678 "payment failed"
|
||||
discrawl search --dm "launch checklist"
|
||||
discrawl search --channel billing --author steipete --limit 50 "invoice"
|
||||
discrawl search --include-empty "GitHub"
|
||||
discrawl --json search "websocket closed"
|
||||
@ -317,8 +259,6 @@ Modes:
|
||||
- `semantic` embeds the query, searches locally stored message vectors, and returns a clear error if embeddings are disabled or no compatible vectors exist.
|
||||
- `hybrid` runs FTS and semantic search, deduplicates by message id, and falls back to FTS when semantic search is unavailable.
|
||||
|
||||
FTS uses SQLite FTS5 with the default `unicode61` tokenizer. User query terms are parameterized and quoted before `MATCH`, so tokens like `AND`, `OR`, `NOT`, `NEAR`, and `*` are searched as input terms instead of FTS operators. Punctuation still follows FTS5 tokenization rules.
|
||||
|
||||
Semantic and hybrid search require `[search.embeddings]` plus local `message_embeddings` rows for the configured provider, model, and input version. Run `discrawl sync --with-embeddings` to enqueue changed messages, then `discrawl embed` to generate vectors. The input version is currently `message_normalized_v1`, so vectors are tied to normalized message text rather than raw Discord payloads.
|
||||
|
||||
### `messages`
|
||||
@ -331,7 +271,6 @@ discrawl messages --channel maintainers --hours 6 --all
|
||||
discrawl messages --channel "#maintainers" --since 2026-03-01T00:00:00Z
|
||||
discrawl messages --channel 1456744319972282449 --author steipete --limit 50
|
||||
discrawl messages --channel maintainers --last 100 --sync
|
||||
discrawl messages --dm --channel Molty --last 20
|
||||
discrawl messages --channel maintainers --days 7 --all --include-empty
|
||||
discrawl --json messages --channel maintainers --days 3
|
||||
```
|
||||
@ -346,21 +285,6 @@ Notes:
|
||||
- `--sync` runs a blocking pre-query sync for the matching channel or guild scope before reading the local DB
|
||||
- rows with no displayable/searchable content are skipped by default; `--include-empty` opts back in
|
||||
- at least one filter is required
|
||||
- `--dm` is shorthand for `--guild @me`, so DM searches and message slices do not need raw SQL
|
||||
|
||||
### `dms`
|
||||
|
||||
Lists local wiretap DM conversations or reads one DM thread.
|
||||
|
||||
```bash
|
||||
discrawl dms
|
||||
discrawl dms --with Molty --last 20
|
||||
discrawl dms --with 1456464433768300635 --all
|
||||
discrawl dms --search "launch checklist"
|
||||
discrawl dms --with Molty --search "invoice"
|
||||
```
|
||||
|
||||
`discrawl dms` shows one row per local DM channel with message count, author count, and first/last cached message times. Passing `--with` switches to message output for that DM conversation unless `--list` is also set. `--search` searches only local DM messages. This is a convenience layer over the local-only synthetic guild id `@me`; it skips Git snapshot auto-update because DMs are never imported from the shared mirror, and it still only sees Discord Desktop cache data imported by `wiretap`.
|
||||
|
||||
### `mentions`
|
||||
|
||||
@ -464,14 +388,14 @@ discrawl status
|
||||
Publisher:
|
||||
|
||||
```bash
|
||||
discrawl publish --remote https://github.com/example/discord-archive.git --push
|
||||
discrawl publish --remote https://github.com/openclaw/discord-backup.git --push
|
||||
discrawl publish --readme path/to/discord-backup/README.md --push
|
||||
```
|
||||
|
||||
Subscriber:
|
||||
|
||||
```bash
|
||||
discrawl subscribe https://github.com/example/discord-archive.git
|
||||
discrawl subscribe https://github.com/openclaw/discord-backup.git
|
||||
discrawl search "launch checklist"
|
||||
discrawl messages --channel general --hours 24
|
||||
```
|
||||
@ -481,25 +405,25 @@ discrawl messages --channel general --hours 24
|
||||
Configure freshness:
|
||||
|
||||
```bash
|
||||
discrawl subscribe --stale-after 15m https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --stale-after 15m https://github.com/openclaw/discord-backup.git
|
||||
discrawl subscribe --no-auto-update https://github.com/openclaw/discord-backup.git
|
||||
```
|
||||
|
||||
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`). Imports are planned from crawlkit shard fingerprints, with a Git-object fallback for older manifests, so routine updates normally read only changed tail shards and preserve local FTS rows instead of rebuilding the whole archive. `discrawl update` forces the same pull/import step manually. `discrawl sync` does not auto-import the share unless `--update=auto` or `--update=force` is provided, so routine live refreshes stay fast.
|
||||
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`). `discrawl update` forces the same pull/import step manually.
|
||||
|
||||
Hybrid mode is supported too: keep normal Discord credentials configured and set `share.remote`. `discrawl sync --update=auto` and `discrawl messages --sync` import the Git snapshot first, usually as a changed-shard delta, then use live Discord for latest-message deltas. Use `sync --all-channels` or `sync --full` when you intentionally want a broader live repair/backfill pass.
|
||||
Hybrid mode is supported too: keep normal Discord credentials configured and set `share.remote`. `discrawl sync` and `discrawl messages --sync` import the Git snapshot first, then use live Discord for latest-message deltas. Use `sync --all-channels` or `sync --full` when you intentionally want a broader live repair/backfill pass.
|
||||
|
||||
Git snapshots publish non-DM archive tables by default. Embedding queue state stays local to each machine, and Git-only readers can use FTS immediately without an embedding provider.
|
||||
Git snapshots publish archive tables by default. Embedding queue state stays local to each machine, and Git-only readers can use FTS immediately without an embedding provider.
|
||||
|
||||
Generated vectors can be backed up explicitly:
|
||||
|
||||
```bash
|
||||
discrawl publish --with-embeddings --push
|
||||
discrawl subscribe --with-embeddings https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --with-embeddings https://github.com/openclaw/discord-backup.git
|
||||
discrawl update --with-embeddings
|
||||
```
|
||||
|
||||
`--with-embeddings` exports stored `message_embeddings` rows for the configured `[search.embeddings]` provider/model plus the current input version. The snapshot stores those vectors under `embeddings/<provider>/<model>/<input_version>/...` and records that identity in `manifest.json`. Only vectors for non-DM messages are exported. Import only restores matching embedding manifests, so an Ollama/nomic subscriber does not accidentally import OpenAI/text-embedding vectors into semantic search. `embedding_jobs` is never exported; subscribers that want fresh local vectors can run `discrawl embed --rebuild` to create their own queue and vectors. Publishing without `--with-embeddings` omits embedding manifests instead of carrying forward an older bundle.
|
||||
`--with-embeddings` exports stored `message_embeddings` rows for the configured `[search.embeddings]` provider/model plus the current input version. The snapshot stores those vectors under `embeddings/<provider>/<model>/<input_version>/...` and records that identity in `manifest.json`. Import only restores matching embedding manifests, so an Ollama/nomic subscriber does not accidentally import OpenAI/text-embedding vectors into semantic search. `embedding_jobs` is never exported; subscribers that want fresh local vectors can run `discrawl embed --rebuild` to create their own queue and vectors.
|
||||
|
||||
The Docker smoke test installs `discrawl` in a clean Go container, subscribes to a Git snapshot repo, then checks `search`, `messages`, `sql`, and `report`:
|
||||
|
||||
@ -518,47 +442,16 @@ discrawl report --readme path/to/discord-backup/README.md
|
||||
|
||||
Every scheduled snapshot publish updates deterministic README stats: latest update time, latest archived message, archive totals, and day/week/month activity.
|
||||
|
||||
The backup workflows restore and save `.discrawl-ci/discrawl.db` with `actions/cache`. On a warm runner cache, scheduled publishers skip the pre-sync snapshot import and go straight to the live latest-message delta before publishing. Cache misses still import the latest published snapshot first so `--latest-only` has channel cursors to resume from.
|
||||
The backup README field notes are intentionally a separate daily workflow, not part of `discrawl report`, so model latency or quota cannot block the 15-minute data publish path. `.github/workflows/discord-backup-report.yml` installs `openclaw@latest`, runs `openclaw agent --local` with OpenAI, and inserts a separate `discrawl-field-notes` block with:
|
||||
|
||||
### `digest`
|
||||
- what people seem to love
|
||||
- what people complain about
|
||||
- complaint topics correlated with recent GitHub issue and PR clusters
|
||||
- the likely best PR to watch
|
||||
|
||||
Summarizes per-channel activity for a lookback window.
|
||||
Configure `OPENAI_API_KEY` in the discrawl repo secrets to enable agent-written field notes. `DISCORD_BACKUP_TOKEN` still needs write access to `openclaw/discord-backup`. If the GitHub repo used for issue/PR correlation is private, also set `DISCORD_FIELD_NOTES_GITHUB_TOKEN` with read access to that repo.
|
||||
|
||||
```bash
|
||||
discrawl digest
|
||||
discrawl digest --since 30d
|
||||
discrawl digest --guild 123456789012345678
|
||||
discrawl digest --channel general
|
||||
discrawl --json digest --since 7d --top-n 5
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `--since` accepts Go durations (`72h`, `30m`) and `Nd` shorthand (`7d`, `30d`)
|
||||
- `--guild` scopes to one guild; when omitted, `default_guild_id` is used if configured
|
||||
- `--channel` accepts a channel id or exact channel name
|
||||
- `--top-n` controls how many top posters and mention targets are shown per channel
|
||||
|
||||
### `analytics`
|
||||
|
||||
Groups activity-style queries under one namespace.
|
||||
|
||||
```bash
|
||||
discrawl analytics
|
||||
discrawl analytics quiet --since 30d
|
||||
discrawl analytics quiet --guild 123456789012345678
|
||||
discrawl analytics trends --weeks 8
|
||||
discrawl analytics trends --weeks 12 --channel general
|
||||
discrawl --json analytics quiet --since 60d
|
||||
discrawl --json analytics trends --weeks 4
|
||||
```
|
||||
|
||||
Notes:
|
||||
|
||||
- `analytics quiet` shows top-level text/announcement channels with no messages in the lookback window, including never-active channels
|
||||
- `analytics quiet --guild` scopes the report to one guild; when omitted, `default_guild_id` is used if configured
|
||||
- `analytics trends` shows Monday-start UTC weekly message counts per message-capable channel
|
||||
- `analytics trends --channel` accepts a channel id or exact channel name
|
||||
The backup workflows restore and save `.discrawl-ci/discrawl.db` with `actions/cache`. On a warm runner cache, `discrawl update` compares the cached DB's last imported snapshot timestamp with `manifest.json` and skips the full sharded import when they match. Cache misses and newer backup manifests still take the normal pull/import path.
|
||||
|
||||
### `doctor`
|
||||
|
||||
@ -583,23 +476,17 @@ cache_dir = "~/.discrawl/cache"
|
||||
log_dir = "~/.discrawl/logs"
|
||||
|
||||
[discord]
|
||||
token_source = "env" # use "none" for Git-only read access
|
||||
token_source = "openclaw" # use "none" for Git-only read access
|
||||
openclaw_config = "~/.openclaw/openclaw.json"
|
||||
account = "default"
|
||||
token_env = "DISCORD_BOT_TOKEN"
|
||||
token_keyring_service = "discrawl"
|
||||
token_keyring_account = "discord_bot_token"
|
||||
|
||||
[sync]
|
||||
source = "both" # use "discord" for bot-only sync or "wiretap" for desktop-cache-only import
|
||||
concurrency = 16
|
||||
repair_every = "6h"
|
||||
full_history = true
|
||||
attachment_text = true
|
||||
|
||||
[desktop]
|
||||
path = "~/.config/discord" # macOS default: "~/Library/Application Support/discord"
|
||||
max_file_bytes = 67108864
|
||||
full_cache = false
|
||||
|
||||
[search]
|
||||
default_mode = "fts"
|
||||
|
||||
@ -624,8 +511,7 @@ Config override rules:
|
||||
|
||||
- `--config` beats everything
|
||||
- `DISCRAWL_CONFIG` overrides the default config path
|
||||
- `discord.token_source = "none"` disables live Discord access for Git-only readers
|
||||
- `discord.token_source = "keyring"` skips env lookup and reads only the configured OS keyring item
|
||||
- `discord.token_source = "env"` forces env-only token lookup
|
||||
- `DISCRAWL_NO_AUTO_UPDATE=1` disables Git snapshot auto-update for read commands in one process, useful for report jobs that already imported a fresh backup.
|
||||
|
||||
## Embeddings
|
||||
@ -679,10 +565,6 @@ With remote providers, message text is sent during `discrawl embed`, and search
|
||||
- FTS index rows
|
||||
- optional local embedding queue metadata and vectors
|
||||
|
||||
Messages imported from Discord Desktop use the same message, attachment, mention, and FTS paths as bot-synced messages.
|
||||
|
||||
Proven DMs use `@me` as their guild id. Unclassifiable desktop-cache payloads are skipped instead of being stored as unknown synthetic data.
|
||||
|
||||
SQLite schema migrations are versioned with `PRAGMA user_version`. Startup now fails fast when a local DB schema is newer than the supported binary.
|
||||
|
||||
Attachment binaries are not stored in SQLite.
|
||||
@ -693,7 +575,6 @@ Set `sync.attachment_text = false` if you want to keep attachment metadata and f
|
||||
|
||||
- do not commit bot tokens or API keys
|
||||
- default config lives in your home directory, not inside the repo
|
||||
- prefer env vars or the OS keyring for bot tokens
|
||||
- CI runs secret scanning with `gitleaks`
|
||||
- `doctor` reports token source, not token contents
|
||||
|
||||
@ -706,10 +587,9 @@ go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.1 run
|
||||
go test ./... -coverprofile=/tmp/discrawl.cover
|
||||
go tool cover -func=/tmp/discrawl.cover | tail -n 1
|
||||
go build ./cmd/discrawl
|
||||
go run ./cmd/discrawl help | grep tui
|
||||
```
|
||||
|
||||
Target coverage is `>= 85%`.
|
||||
Target coverage is `>= 80%`.
|
||||
|
||||
CI runs:
|
||||
|
||||
|
||||
92
SPEC.md
92
SPEC.md
@ -51,7 +51,7 @@ These are settled unless the user explicitly changes them:
|
||||
- DB location: `~/.discrawl/discrawl.db`
|
||||
- cache dir: `~/.discrawl/cache/`
|
||||
- log dir: `~/.discrawl/logs/`
|
||||
- token source: `DISCORD_BOT_TOKEN` or configured env var, then optional OS keyring fallback
|
||||
- token source: reuse Molty / existing OpenClaw Discord bot config
|
||||
- guild model: one guild in CLI UX, multi-guild-ready schema
|
||||
- search: hybrid, with FTS first and embeddings optional
|
||||
- embedding provider: OpenAI
|
||||
@ -71,12 +71,33 @@ An agent should assume:
|
||||
- Go is installed and modern
|
||||
- user is Peter
|
||||
- user keeps many secrets in `~/.profile`
|
||||
- an existing OpenClaw install may already contain usable Discord bot config
|
||||
|
||||
### Key file paths
|
||||
|
||||
- `~/.discrawl/config.toml`
|
||||
- `~/.discrawl/discrawl.db`
|
||||
- `~/.profile`
|
||||
- `~/.openclaw/openclaw.json`
|
||||
- `~/.openclaw/openclaw.json.bak*`
|
||||
|
||||
### Existing bot config
|
||||
|
||||
The current bot token source is expected in:
|
||||
|
||||
- `~/.openclaw/openclaw.json`
|
||||
|
||||
Expected path inside JSON:
|
||||
|
||||
- `channels.discord.token`
|
||||
|
||||
Expected guild selection path:
|
||||
|
||||
- `channels.discord.guilds`
|
||||
|
||||
The current intended default mode is:
|
||||
|
||||
- `discrawl init --from-openclaw ~/.openclaw/openclaw.json`
|
||||
|
||||
### OpenAI embeddings key
|
||||
|
||||
@ -407,11 +428,12 @@ discrawl [global flags] <command> [args]
|
||||
Purpose:
|
||||
|
||||
- create `~/.discrawl/config.toml`
|
||||
- discover accessible Discord guilds
|
||||
- import defaults from OpenClaw
|
||||
- persist guild id and DB path
|
||||
|
||||
Expected flags:
|
||||
|
||||
- `--from-openclaw <path>`
|
||||
- `--guild <id>`
|
||||
- `--db <path>`
|
||||
- `--with-embeddings`
|
||||
@ -465,14 +487,12 @@ Expected flags:
|
||||
- `--dry-run`
|
||||
- `--watch-every <duration>`
|
||||
- `--max-file-bytes <bytes>`
|
||||
- `--full-cache`
|
||||
|
||||
Requirements:
|
||||
|
||||
- never use Discord user tokens
|
||||
- never extract or persist auth tokens from desktop cache
|
||||
- scan bounded local files only
|
||||
- default to route-bearing HTTP cache entries; exhaustive Chromium cache scans require explicit full-cache mode
|
||||
- store sanitized raw metadata, not full arbitrary cache blobs
|
||||
|
||||
### `search`
|
||||
@ -536,7 +556,7 @@ Must show:
|
||||
Must check:
|
||||
|
||||
- config file readable
|
||||
- Discord token env var readable unless live access is disabled
|
||||
- OpenClaw token source readable
|
||||
- Discord auth valid
|
||||
- guild reachable
|
||||
- DB openable
|
||||
@ -563,10 +583,9 @@ cache_dir = "~/.discrawl/cache"
|
||||
log_dir = "~/.discrawl/logs"
|
||||
|
||||
[discord]
|
||||
token_source = "env"
|
||||
token_env = "DISCORD_BOT_TOKEN"
|
||||
token_keyring_service = "discrawl"
|
||||
token_keyring_account = "discord_bot_token"
|
||||
token_source = "openclaw"
|
||||
openclaw_config = "~/.openclaw/openclaw.json"
|
||||
channel_account = "discord"
|
||||
|
||||
[sync]
|
||||
concurrency = 4
|
||||
@ -593,7 +612,6 @@ Config precedence:
|
||||
Environment variables:
|
||||
|
||||
- `DISCRAWL_CONFIG`
|
||||
- `DISCORD_BOT_TOKEN`
|
||||
- `OPENAI_API_KEY`
|
||||
|
||||
## Token Handling Rules
|
||||
@ -606,8 +624,7 @@ Do not:
|
||||
|
||||
Do:
|
||||
|
||||
- load bot token from env
|
||||
- fall back to the configured OS keyring item when env is empty
|
||||
- load bot token from OpenClaw config path
|
||||
- load OpenAI key from env
|
||||
- redact secrets in debug and doctor output
|
||||
|
||||
@ -791,54 +808,3 @@ For an AI agent to finish the product without external memory, this repo should
|
||||
- milestone order
|
||||
|
||||
This file is the authoritative engineering spec for now.
|
||||
|
||||
## Digest
|
||||
|
||||
`discrawl digest` provides a per-channel activity summary over a lookback window.
|
||||
|
||||
Example usage:
|
||||
|
||||
```bash
|
||||
discrawl digest
|
||||
discrawl digest --since 7d
|
||||
discrawl digest --since 30d --guild 123456789012345678
|
||||
discrawl digest --channel general --top-n 5
|
||||
discrawl --json digest --since 72h
|
||||
```
|
||||
|
||||
Behavior:
|
||||
|
||||
- window defaults to `7d` when `--since` is omitted
|
||||
- `--since` accepts Go durations (`72h`, `30m`) and `Nd` shorthand (`7d`, `30d`)
|
||||
- `--guild` filters by `guild_id`; empty means no guild filter
|
||||
- `--channel` accepts channel id or exact channel name
|
||||
- per-channel metrics include `messages`, `replies`, and `active_authors`
|
||||
- top posters are ranked by message count using member display fallback order: `display_name -> nick -> global_name -> username -> author_id -> unknown`
|
||||
- top mentions are ranked from `mention_events` and include all target types (`user` and `role`)
|
||||
- channels are sorted by message count descending, then channel name ascending
|
||||
- JSON output returns a `Digest` object with channel rows and totals; plain output emits one tab-separated row per channel
|
||||
|
||||
## Analytics
|
||||
|
||||
`discrawl analytics` is a subcommand group for activity-style queries.
|
||||
|
||||
Example usage:
|
||||
|
||||
```bash
|
||||
discrawl analytics
|
||||
discrawl analytics quiet --since 30d
|
||||
discrawl analytics quiet --guild 123456789012345678
|
||||
discrawl analytics trends --weeks 8
|
||||
discrawl analytics trends --weeks 12 --channel general
|
||||
discrawl --json analytics quiet --since 60d
|
||||
discrawl --json analytics trends --weeks 4
|
||||
```
|
||||
|
||||
Behavior:
|
||||
|
||||
- `analytics quiet` defaults to `30d` lookback and supports `--guild`
|
||||
- `analytics quiet` includes top-level text/announcement channels with no messages at all
|
||||
- quiet rows are sorted with never-active channels first, then by longest silence
|
||||
- `analytics trends` defaults to `8` weeks and supports `--guild` plus `--channel` (id or exact name)
|
||||
- `analytics trends` buckets messages into Monday-start UTC weeks and zero-fills missing weeks for every returned message-capable channel
|
||||
- trends rows are sorted by total messages descending, then channel name ascending
|
||||
|
||||
@ -4,17 +4,12 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/cli"
|
||||
"github.com/steipete/discrawl/internal/cli"
|
||||
)
|
||||
|
||||
func main() {
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
err := cli.Run(ctx, os.Args[1:], os.Stdout, os.Stderr)
|
||||
stop()
|
||||
if err != nil {
|
||||
if err := cli.Run(context.Background(), os.Args[1:], os.Stdout, os.Stderr); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err.Error())
|
||||
os.Exit(cli.ExitCode(err))
|
||||
}
|
||||
|
||||
@ -1,18 +1,10 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestMainHelpAndVersion(t *testing.T) {
|
||||
@ -35,7 +27,7 @@ func TestMainHelpAndVersion(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("os.Executable: %v", err)
|
||||
}
|
||||
cmd := exec.CommandContext(t.Context(), exe, "-test.run=TestMainHelpAndVersion")
|
||||
cmd := exec.Command(exe, "-test.run=TestMainHelpAndVersion")
|
||||
cmd.Env = append(os.Environ(), "DISCRAWL_MAIN_ERROR=1")
|
||||
err = cmd.Run()
|
||||
var exitErr *exec.ExitError
|
||||
@ -46,137 +38,3 @@ func TestMainHelpAndVersion(t *testing.T) {
|
||||
}
|
||||
t.Fatalf("expected exit code 2, got %v", err)
|
||||
}
|
||||
|
||||
func TestMainCancelsWatchOnSIGTERM(t *testing.T) {
|
||||
if os.Getenv("DISCRAWL_MAIN_SIGNAL_CHILD") == "1" {
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = filepath.Join(dir, "discrawl.db")
|
||||
cfg.CacheDir = filepath.Join(dir, "cache")
|
||||
cfg.LogDir = filepath.Join(dir, "logs")
|
||||
cfg.Desktop.Path = filepath.Join(dir, "discord")
|
||||
requireNoError(t, os.MkdirAll(cfg.Desktop.Path, 0o755))
|
||||
requireNoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
oldArgs := os.Args
|
||||
t.Cleanup(func() { os.Args = oldArgs })
|
||||
os.Args = []string{"discrawl", "--config", cfgPath, "wiretap", "--dry-run", "--watch-every", "1s"}
|
||||
go func() {
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
process, err := os.FindProcess(os.Getpid())
|
||||
if err == nil {
|
||||
_ = process.Signal(syscall.SIGTERM)
|
||||
}
|
||||
}()
|
||||
main()
|
||||
return
|
||||
}
|
||||
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
t.Fatalf("os.Executable: %v", err)
|
||||
}
|
||||
cmd := exec.CommandContext(t.Context(), exe, "-test.run=TestMainCancelsWatchOnSIGTERM")
|
||||
cmd.Env = append(os.Environ(), "DISCRAWL_MAIN_SIGNAL_CHILD=1")
|
||||
output, err := cmd.CombinedOutput()
|
||||
if isContextCanceledExit(err, output) {
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("expected graceful SIGTERM cancellation, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMainCancelsWiretapImportOnSIGTERMWithoutCorruptingDB(t *testing.T) {
|
||||
if dir := os.Getenv("DISCRAWL_MAIN_IMPORT_SIGNAL_DIR"); dir != "" {
|
||||
runWiretapImportSignalChild(t, dir)
|
||||
return
|
||||
}
|
||||
|
||||
dir := t.TempDir()
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
t.Fatalf("os.Executable: %v", err)
|
||||
}
|
||||
cmd := exec.CommandContext(t.Context(), exe, "-test.run=TestMainCancelsWiretapImportOnSIGTERMWithoutCorruptingDB")
|
||||
cmd.Env = append(os.Environ(), "DISCRAWL_MAIN_IMPORT_SIGNAL_DIR="+dir)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if !isContextCanceledExit(err, output) {
|
||||
t.Fatalf("expected context-canceled exit from SIGTERM, got err=%v output=%s", err, output)
|
||||
}
|
||||
|
||||
ctx := t.Context()
|
||||
s, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
if err != nil {
|
||||
t.Fatalf("open db after SIGTERM: %v output=%s", err, output)
|
||||
}
|
||||
defer func() { _ = s.Close() }()
|
||||
_, rows, err := s.ReadOnlyQuery(ctx, "pragma quick_check")
|
||||
if err != nil {
|
||||
t.Fatalf("quick_check after SIGTERM: %v output=%s", err, output)
|
||||
}
|
||||
if len(rows) != 1 || len(rows[0]) != 1 || rows[0][0] != "ok" {
|
||||
t.Fatalf("quick_check after SIGTERM = %#v output=%s", rows, output)
|
||||
}
|
||||
}
|
||||
|
||||
func runWiretapImportSignalChild(t *testing.T, dir string) {
|
||||
t.Helper()
|
||||
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = filepath.Join(dir, "discrawl.db")
|
||||
cfg.CacheDir = filepath.Join(dir, "cache")
|
||||
cfg.LogDir = filepath.Join(dir, "logs")
|
||||
cfg.Desktop.Path = filepath.Join(dir, "discord")
|
||||
cfg.Discord.TokenSource = "none"
|
||||
cfg.Share.AutoUpdate = false
|
||||
cachePath := filepath.Join(cfg.Desktop.Path, "Local Storage", "leveldb")
|
||||
requireNoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
requireNoError(t, config.Write(cfgPath, cfg))
|
||||
writeLargeWiretapCache(t, filepath.Join(cachePath, "000001.log"), 50000)
|
||||
|
||||
oldArgs := os.Args
|
||||
t.Cleanup(func() { os.Args = oldArgs })
|
||||
os.Args = []string{"discrawl", "--config", cfgPath, "wiretap", "--path", cfg.Desktop.Path}
|
||||
go func() {
|
||||
time.Sleep(15 * time.Millisecond)
|
||||
process, err := os.FindProcess(os.Getpid())
|
||||
if err == nil {
|
||||
_ = process.Signal(syscall.SIGTERM)
|
||||
}
|
||||
}()
|
||||
main()
|
||||
}
|
||||
|
||||
func writeLargeWiretapCache(t *testing.T, path string, count int) {
|
||||
t.Helper()
|
||||
|
||||
file, err := os.Create(path)
|
||||
requireNoError(t, err)
|
||||
defer func() { requireNoError(t, file.Close()) }()
|
||||
_, err = fmt.Fprintln(file, `{"id":"111111111111111117","guild_id":"999999999999999997","type":0,"name":"sigterm-import"}`)
|
||||
requireNoError(t, err)
|
||||
for i := range count {
|
||||
_, err = fmt.Fprintf(
|
||||
file,
|
||||
`{"id":"3333333333%09d","channel_id":"111111111111111117","content":"sigterm import message %d","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222228","username":"alice"}}`+"\n",
|
||||
i,
|
||||
i,
|
||||
)
|
||||
requireNoError(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
func isContextCanceledExit(err error, output []byte) bool {
|
||||
var exitErr *exec.ExitError
|
||||
return errors.As(err, &exitErr) && exitErr.ExitCode() == 1 && bytes.Contains(output, []byte("context canceled"))
|
||||
}
|
||||
|
||||
func requireNoError(t *testing.T, err error) {
|
||||
t.Helper()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1 +0,0 @@
|
||||
discrawl.sh
|
||||
@ -1,46 +0,0 @@
|
||||
# Discrawl
|
||||
|
||||
Mirror Discord guilds into local SQLite. Search server history without depending on Discord search. Bring a bot token, or read everything offline from a Git snapshot.
|
||||
|
||||
## What it does
|
||||
|
||||
- discovers every guild a bot can access and syncs channels, threads, members, and message history into SQLite
|
||||
- maintains FTS5 indexes for fast literal search; optional embeddings for semantic and hybrid recall
|
||||
- imports classifiable Discord Desktop cache messages with `wiretap`, including proven DMs under `@me`
|
||||
- tails the Gateway for live updates with periodic repair sweeps
|
||||
- publishes the archive as sharded NDJSON snapshots in a private Git repo so readers can search offline with no Discord credentials
|
||||
- exposes read-only SQL, channel/member directories, mention queries, digests, and trend analytics
|
||||
|
||||
## Pick your path
|
||||
|
||||
- **New here?** Read [Install](install.html) and run `discrawl init`.
|
||||
- **Already have a bot?** Jump to [`sync`](commands/sync.html) and [`search`](commands/search.html).
|
||||
- **Just want to read a shared archive?** Use [`subscribe`](commands/subscribe.html) - no token needed.
|
||||
- **Need DM search?** [`wiretap`](commands/wiretap.html) imports local Discord Desktop cache.
|
||||
- **Want semantic search?** Configure [Embeddings](guides/embeddings.html), then run [`embed`](commands/embed.html).
|
||||
- **Wiring an agent or launcher?** `discrawl metadata --json`, `discrawl status --json`, and `discrawl doctor --json` expose the read-only crawlkit control surface.
|
||||
|
||||
## At a glance
|
||||
|
||||
```bash
|
||||
export DISCORD_BOT_TOKEN="..."
|
||||
discrawl init
|
||||
discrawl doctor
|
||||
discrawl sync --full
|
||||
discrawl search "panic: nil pointer"
|
||||
discrawl tail
|
||||
```
|
||||
|
||||
[`discrawl tui`](commands/tui.html) uses the shared crawlkit terminal explorer:
|
||||
channel/person/thread groups on the left, message rows in the middle, and
|
||||
readable message/thread detail on the right.
|
||||
|
||||
## Sections
|
||||
|
||||
- **[Start](install.html)** - install, configure, set up the Discord bot, security notes, contact
|
||||
- **[Guides](guides/)** - sync sources, wiretap internals, search modes, embeddings, Git snapshots, data layout
|
||||
- **[Commands](commands/)** - one page per CLI command
|
||||
|
||||
## Where to file issues
|
||||
|
||||
`https://github.com/openclaw/discrawl/issues`. See [contact](contact.html) for project links.
|
||||
@ -7,7 +7,7 @@ summary: "Release checklist for discrawl (GitHub release binaries via GoReleaser
|
||||
Always do all steps below. No partial releases.
|
||||
|
||||
Assumptions:
|
||||
- Repo: `openclaw/discrawl`
|
||||
- Repo: `steipete/discrawl`
|
||||
- Binary: `discrawl`
|
||||
- GoReleaser config: `.goreleaser.yaml`
|
||||
- Homebrew tap repo: `~/Projects/homebrew-tap`
|
||||
@ -22,15 +22,14 @@ Assumptions:
|
||||
## 1) Verify build + tests
|
||||
|
||||
```sh
|
||||
go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.11.1 run
|
||||
go test -count=1 ./... -coverprofile=coverage.out
|
||||
go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.5.0 run
|
||||
go test ./... -coverprofile=coverage.out
|
||||
go tool cover -func=coverage.out | tail -n 1
|
||||
go test -count=1 -race ./...
|
||||
go build -o /tmp/discrawl ./cmd/discrawl
|
||||
gh run list -L 5 --branch main
|
||||
```
|
||||
|
||||
Coverage floor: `85%+`
|
||||
Coverage floor: `80%+`
|
||||
|
||||
## 2) Update changelog
|
||||
|
||||
@ -82,7 +81,7 @@ After tagging a real release:
|
||||
Useful commands:
|
||||
|
||||
```sh
|
||||
curl -L -o /tmp/discrawl-darwin-arm64.tgz https://github.com/openclaw/discrawl/releases/download/vX.Y.Z/discrawl_X.Y.Z_darwin_arm64.tar.gz
|
||||
curl -L -o /tmp/discrawl-darwin-arm64.tgz https://github.com/steipete/discrawl/releases/download/vX.Y.Z/discrawl_X.Y.Z_darwin_arm64.tar.gz
|
||||
shasum -a 256 /tmp/discrawl-darwin-arm64.tgz
|
||||
brew uninstall discrawl || true
|
||||
brew install steipete/tap/discrawl
|
||||
@ -92,7 +91,7 @@ brew info steipete/tap/discrawl
|
||||
|
||||
## Notes
|
||||
|
||||
- Build-time version stamping comes from `-X github.com/openclaw/discrawl/internal/cli.version={{ .Version }}`
|
||||
- Build-time version stamping comes from `-X github.com/steipete/discrawl/internal/cli.version={{ .Version }}`
|
||||
- If release workflow needs a rerun:
|
||||
|
||||
```sh
|
||||
|
||||
@ -1,63 +0,0 @@
|
||||
# Discord bot setup
|
||||
|
||||
Discrawl needs a real Discord bot token to run `sync` or `tail`. Not a user token. The desktop `wiretap` import does not need any token.
|
||||
|
||||
## Minimum setup
|
||||
|
||||
1. Create or reuse a Discord application in the [Discord developer portal](https://discord.com/developers/applications).
|
||||
2. Add a bot user to that application.
|
||||
3. Invite the bot to the target guilds.
|
||||
4. Enable these intents for the bot:
|
||||
- **Server Members Intent**
|
||||
- **Message Content Intent**
|
||||
5. Ensure the bot can at least:
|
||||
- view channels
|
||||
- read message history
|
||||
|
||||
Without those intents/permissions, `sync`, `tail`, member snapshots, and message content archiving will be partial or fail outright.
|
||||
|
||||
## Provide the token
|
||||
|
||||
### Environment variable
|
||||
|
||||
```bash
|
||||
export DISCORD_BOT_TOKEN="your-bot-token"
|
||||
discrawl doctor
|
||||
```
|
||||
|
||||
If you keep shell secrets in `~/.profile`, add the export there and reload your shell.
|
||||
|
||||
### OS keyring
|
||||
|
||||
If you prefer the OS keyring, keep the token out of config and store it in the default keyring item:
|
||||
|
||||
```bash
|
||||
# macOS Keychain
|
||||
security add-generic-password -U -s discrawl -a discord_bot_token -w "$DISCORD_BOT_TOKEN"
|
||||
|
||||
# Linux Secret Service / libsecret
|
||||
printf %s "$DISCORD_BOT_TOKEN" | secret-tool store --label="discrawl Discord bot token" service discrawl username discord_bot_token
|
||||
|
||||
# Windows Credential Manager
|
||||
cmdkey /generic:discrawl:discord_bot_token /user:discord_bot_token /pass:%DISCORD_BOT_TOKEN%
|
||||
```
|
||||
|
||||
Set `discord.token_source = "keyring"` if you want to require the keyring and skip env entirely.
|
||||
|
||||
## Verify
|
||||
|
||||
```bash
|
||||
discrawl doctor
|
||||
```
|
||||
|
||||
`doctor` reports the token source (env or keyring), confirms bot auth, lists how many guilds the bot can access, and verifies the local DB plus FTS wiring. It does not print the token contents.
|
||||
|
||||
## Wiretap-only setup
|
||||
|
||||
If you only want to import local Discord Desktop cache messages and not run a bot, skip everything above and run:
|
||||
|
||||
```bash
|
||||
discrawl sync --source wiretap
|
||||
```
|
||||
|
||||
Or `discrawl wiretap` directly. See the [wiretap guide](guides/wiretap.html).
|
||||
@ -1,37 +0,0 @@
|
||||
# `analytics`
|
||||
|
||||
Groups activity-style queries under one namespace.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl analytics
|
||||
discrawl analytics quiet --since 30d
|
||||
discrawl analytics quiet --guild 123456789012345678
|
||||
discrawl analytics trends --weeks 8
|
||||
discrawl analytics trends --weeks 12 --channel general
|
||||
discrawl --json analytics quiet --since 60d
|
||||
discrawl --json analytics trends --weeks 4
|
||||
```
|
||||
|
||||
## Subcommands
|
||||
|
||||
### `quiet`
|
||||
|
||||
Top-level text/announcement channels with no messages in the lookback window, including never-active channels.
|
||||
|
||||
- `--since <duration>` - lookback window (e.g. `30d`, `60d`)
|
||||
- `--guild <id>` - scope to one guild; when omitted, `default_guild_id` is used if configured
|
||||
|
||||
### `trends`
|
||||
|
||||
Monday-start UTC weekly message counts per message-capable channel.
|
||||
|
||||
- `--weeks <n>` - number of weeks to include
|
||||
- `--channel <id|name>` - scope to one channel
|
||||
- `--guild <id>` - scope to one guild
|
||||
|
||||
## See also
|
||||
|
||||
- [`digest`](digest.html)
|
||||
- [`status`](status.html)
|
||||
@ -1,25 +0,0 @@
|
||||
# `channels`
|
||||
|
||||
Browse the offline channel directory.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl channels list
|
||||
discrawl channels show 123456789012345678
|
||||
```
|
||||
|
||||
## Subcommands
|
||||
|
||||
- `list` - dump every channel and thread in the local archive
|
||||
- `show <id>` - show metadata for one channel/thread
|
||||
|
||||
## Notes
|
||||
|
||||
- threads are stored as channels because that matches the Discord model
|
||||
- archived threads are part of the sync surface and appear here too
|
||||
|
||||
## See also
|
||||
|
||||
- [`members`](members.html)
|
||||
- [Data layout](../guides/data-storage.html)
|
||||
@ -1,29 +0,0 @@
|
||||
# `digest`
|
||||
|
||||
Summarizes per-channel activity for a lookback window.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl digest
|
||||
discrawl digest --since 30d
|
||||
discrawl digest --guild 123456789012345678
|
||||
discrawl digest --channel general
|
||||
discrawl --json digest --since 7d --top-n 5
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
- `--since <duration>` - Go durations (`72h`, `30m`) and `Nd` shorthand (`7d`, `30d`)
|
||||
- `--guild <id>` - scope to one guild; when omitted, `default_guild_id` is used if configured
|
||||
- `--channel <id|name>` - scope to one channel
|
||||
- `--top-n <n>` - how many top posters and mention targets per channel
|
||||
|
||||
## Output
|
||||
|
||||
For each channel in scope: message count, top posters, top mention targets, first/last activity in window.
|
||||
|
||||
## See also
|
||||
|
||||
- [`analytics`](analytics.html)
|
||||
- [`mentions`](mentions.html)
|
||||
@ -1,39 +0,0 @@
|
||||
# `dms`
|
||||
|
||||
Lists local wiretap DM conversations or reads one DM thread. Convenience layer over the synthetic `@me` guild id.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl dms
|
||||
discrawl dms --with Molty --last 20
|
||||
discrawl dms --with 1456464433768300635 --all
|
||||
discrawl dms --search "launch checklist"
|
||||
discrawl dms --with Molty --search "invoice"
|
||||
```
|
||||
|
||||
## Default output
|
||||
|
||||
`discrawl dms` (no flags) shows one row per local DM channel with:
|
||||
|
||||
- message count
|
||||
- author count
|
||||
- first/last cached message times
|
||||
|
||||
## Flags
|
||||
|
||||
- `--with <name|id>` - switches to message output for that DM conversation (unless `--list` is also set)
|
||||
- `--list` - keep the channel-summary listing even when `--with` is set
|
||||
- `--search <query>` - search only local DM messages
|
||||
- `--last <n>` / `--all` / `--limit <n>` - same slicing as [`messages`](messages.html)
|
||||
|
||||
## Notes
|
||||
|
||||
- only sees data imported by [`wiretap`](wiretap.html) - Discord Desktop cache, not live DM history
|
||||
- skips Git snapshot auto-update because DMs are never imported from the shared mirror
|
||||
- DMs are local-only and never published
|
||||
|
||||
## See also
|
||||
|
||||
- [Wiretap guide](../guides/wiretap.html)
|
||||
- [`messages --dm`](messages.html)
|
||||
@ -1,35 +0,0 @@
|
||||
# `doctor`
|
||||
|
||||
Checks config, auth, DB, and FTS wiring. The fastest sanity check.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl doctor
|
||||
```
|
||||
|
||||
## What it verifies
|
||||
|
||||
- config loads from the expected path
|
||||
- where the bot token was resolved from (env var or OS keyring)
|
||||
- bot auth succeeds against Discord
|
||||
- how many guilds the bot can access
|
||||
- local SQLite database exists and the schema version matches the binary
|
||||
- FTS5 index is wired up
|
||||
|
||||
## What it does not do
|
||||
|
||||
- does not print the token contents
|
||||
- does not run a sync; it only checks readiness
|
||||
|
||||
## Common outputs
|
||||
|
||||
- "token from env (DISCORD_BOT_TOKEN)" or "token from keyring (discrawl/discord_bot_token)"
|
||||
- "0 guilds visible" - bot is not invited to any guild yet, or intents/permissions are missing
|
||||
- "schema newer than binary" - update `discrawl` to a build that supports the local DB schema
|
||||
|
||||
## See also
|
||||
|
||||
- [Bot setup](../bot-setup.html)
|
||||
- [Configuration](../configuration.html)
|
||||
- [`status`](status.html)
|
||||
@ -1,42 +0,0 @@
|
||||
# `embed`
|
||||
|
||||
Drains pending `embedding_jobs` rows by calling the configured embedding provider and writing vectors to `message_embeddings`.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl embed
|
||||
discrawl embed --limit 1000
|
||||
discrawl embed --rebuild --limit 1000
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
- `--limit <n>` - cap how many jobs this run drains
|
||||
- `--batch-size <n>` - provider request batch size
|
||||
- `--rebuild` - regenerate vectors for the existing archive after a provider/model/input-version change
|
||||
|
||||
## Behavior
|
||||
|
||||
- claims jobs with a short lock so overlapping runs do not process the same batch
|
||||
- rate limits requeue the batch and stop that drain run cleanly
|
||||
- provider or validation failures retry up to three attempts before the job is marked failed
|
||||
- messages with no normalized text are marked done and any stale vector for that message is removed
|
||||
|
||||
## Identity
|
||||
|
||||
Provider, model, and input version are stored on each job and vector. Changing any of them retargets pending jobs to the new identity and resets prior attempts. Existing vectors for another identity remain in SQLite but are not used by semantic search.
|
||||
|
||||
## When to use `--rebuild`
|
||||
|
||||
After changing `[search.embeddings]` provider, model, or any input setting, when you want to regenerate vectors for messages already in the archive.
|
||||
|
||||
## Pairing with `sync`
|
||||
|
||||
`sync --with-embeddings` enqueues; `embed` drains. The two phases are intentionally separate so a slow provider does not block the hot sync path.
|
||||
|
||||
## See also
|
||||
|
||||
- [Embeddings guide](../guides/embeddings.html)
|
||||
- [Search modes](../guides/search-modes.html)
|
||||
- [`search`](search.html)
|
||||
@ -1,31 +0,0 @@
|
||||
# `init`
|
||||
|
||||
Creates the local config and discovers accessible guilds.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl init
|
||||
discrawl init --guild 123456789012345678
|
||||
discrawl init --db ~/data/discrawl.db
|
||||
discrawl init --with-embeddings
|
||||
```
|
||||
|
||||
## What it does
|
||||
|
||||
- writes `~/.discrawl/config.toml` (or whatever `--config` / `DISCRAWL_CONFIG` points to)
|
||||
- discovers guilds the configured bot can access
|
||||
- if exactly one guild is available, sets it as `default_guild_id` automatically
|
||||
- creates the SQLite database at `db_path`
|
||||
|
||||
## Flags
|
||||
|
||||
- `--guild <id>` - set a specific default guild instead of auto-picking
|
||||
- `--db <path>` - override `db_path`
|
||||
- `--with-embeddings` - enable `[search.embeddings]` in the generated config
|
||||
|
||||
## See also
|
||||
|
||||
- [Configuration](../configuration.html)
|
||||
- [Bot setup](../bot-setup.html)
|
||||
- [`doctor`](doctor.html)
|
||||
@ -1,72 +0,0 @@
|
||||
# `members`
|
||||
|
||||
Browse the offline member directory built from archived profile payloads.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl members list
|
||||
discrawl members show 123456789012345678
|
||||
discrawl members show --messages 10 steipete
|
||||
discrawl members search "peter"
|
||||
discrawl members search "github"
|
||||
discrawl members search "https://github.com/steipete"
|
||||
```
|
||||
|
||||
## Subcommands
|
||||
|
||||
- `list` - dump the local member directory
|
||||
- `show <id|query>` - show one member; if the query resolves to one match, also show recent messages
|
||||
- `search <query>` - match names plus any offline profile fields present in the archived member payload
|
||||
|
||||
## Flags
|
||||
|
||||
- `show --messages <n>` - include the most recent `n` messages from that member
|
||||
|
||||
## Profile fields
|
||||
|
||||
Extracted from the archived Discord member/user payload. May include:
|
||||
|
||||
- `bio`
|
||||
- `pronouns`
|
||||
- `location`
|
||||
- `website`
|
||||
- `x`
|
||||
- `github`
|
||||
- discovered URLs
|
||||
|
||||
If the bot cannot see a field from Discord, `discrawl` cannot invent it. This is strictly archive-based offline data.
|
||||
|
||||
## Typical workflow
|
||||
|
||||
```bash
|
||||
discrawl sync --full
|
||||
discrawl members search "design engineer"
|
||||
discrawl members search "github"
|
||||
discrawl members show --messages 25 steipete
|
||||
discrawl messages --author steipete --days 30 --all
|
||||
```
|
||||
|
||||
## Typical `members show` output
|
||||
|
||||
```text
|
||||
guild=1456350064065904867
|
||||
user=37658261826043904
|
||||
username=steipete
|
||||
display=Peter Steinberger
|
||||
joined=2026-03-08T16:03:14Z
|
||||
bot=false
|
||||
x=steipete
|
||||
github=steipete
|
||||
website=https://steipete.me
|
||||
bio=Builds native apps and tooling.
|
||||
urls=https://steipete.me, https://github.com/steipete
|
||||
message_count=1284
|
||||
first_message=2026-02-01T09:00:00Z
|
||||
last_message=2026-03-08T15:59:58Z
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [`channels`](channels.html)
|
||||
- [Data layout](../guides/data-storage.html)
|
||||
@ -1,27 +0,0 @@
|
||||
# `mentions`
|
||||
|
||||
Lists structured user and role mentions extracted during sync.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl mentions --channel maintainers --days 7
|
||||
discrawl mentions --target steipete --type user --limit 50
|
||||
discrawl mentions --target 1456406468898197625
|
||||
discrawl --json mentions --type role --days 1
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
- `--target <id|name>` - user or role id, exact name, or partial match
|
||||
- `--type <user|role>` - filter by mention type
|
||||
- `--channel <id|name>` - same channel matching as [`messages`](messages.html)
|
||||
- `--guild <id>` / `--guilds <id,id>` - restrict the guild scope
|
||||
- `--days <n>` / `--since <RFC3339>` / `--before <RFC3339>` - time filters
|
||||
- `--limit <n>` - cap result count
|
||||
|
||||
## Notes
|
||||
|
||||
- mentions are recorded structurally during sync, so this is a direct row read - no FTS parsing
|
||||
- combine with `--type role` to find every mention of a role
|
||||
- combine with `--target steipete` to find everywhere your account got pinged
|
||||
@ -1,41 +0,0 @@
|
||||
# `messages`
|
||||
|
||||
Lists exact message slices by channel, author, and time range. Unlike [`search`](search.html), this does not query the FTS index - it pulls a slice of rows.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl messages --channel maintainers --days 7 --all
|
||||
discrawl messages --channel maintainers --hours 6 --all
|
||||
discrawl messages --channel "#maintainers" --since 2026-03-01T00:00:00Z
|
||||
discrawl messages --channel 1456744319972282449 --author steipete --limit 50
|
||||
discrawl messages --channel maintainers --last 100 --sync
|
||||
discrawl messages --dm --channel Molty --last 20
|
||||
discrawl messages --channel maintainers --days 7 --all --include-empty
|
||||
discrawl --json messages --channel maintainers --days 3
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
- `--channel <id|name|#name>` - id, exact name, `#name`, or partial name match
|
||||
- `--guild <id>` / `--guilds <id,id>` / `--dm` - restrict the guild scope (`--dm` is shorthand for `--guild @me`)
|
||||
- `--author <name>` - restrict to one author
|
||||
- `--hours <n>` - shorthand for "since now minus N hours"
|
||||
- `--days <n>` - shorthand for "since now minus N days"
|
||||
- `--since <RFC3339>` - explicit start timestamp
|
||||
- `--last <n>` - return the newest `N` matching messages, then print oldest-to-newest
|
||||
- `--limit <n>` - safety limit (default 200; `--all` removes it)
|
||||
- `--all` - removes the safety limit
|
||||
- `--sync` - blocking pre-query sync for the matching channel or guild scope
|
||||
- `--include-empty` - include rows with no displayable/searchable content
|
||||
|
||||
## Notes
|
||||
|
||||
- at least one filter is required
|
||||
- `--dm` skips Git snapshot auto-update because DMs are never imported from the shared mirror
|
||||
- use either `--last` for the newest matching rows or `--all` for an uncapped oldest-to-newest slice
|
||||
|
||||
## See also
|
||||
|
||||
- [`search`](search.html)
|
||||
- [`dms`](dms.html)
|
||||
@ -1,42 +0,0 @@
|
||||
# `publish`
|
||||
|
||||
Publishes the local SQLite archive as sharded, compressed NDJSON snapshots in a private Git repo.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl publish --remote https://github.com/example/discord-archive.git --push
|
||||
discrawl publish --readme path/to/discord-backup/README.md --push
|
||||
discrawl publish --message "sync: discord archive" --push
|
||||
discrawl publish --with-embeddings --push
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
- `--repo <path>` - local snapshot repo path (defaults to `[share].repo_path`)
|
||||
- `--remote <url>` - target Git remote (defaults to `[share].remote`)
|
||||
- `--branch <name>` - snapshot branch (defaults to `[share].branch`)
|
||||
- `--message <text>` - commit message (default: `sync: discord archive`)
|
||||
- `--no-commit` - write/export files without creating a Git commit
|
||||
- `--push` - push the snapshot commit after writing it
|
||||
- `--readme <path>` - update the activity block in this README file too
|
||||
- `--with-embeddings` - also export stored `message_embeddings` rows
|
||||
|
||||
## What is published
|
||||
|
||||
- non-DM archive tables (DM `@me` rows are always excluded)
|
||||
- README activity block (latest update, latest message, totals, day/week/month activity)
|
||||
- with `--with-embeddings`: vectors for the configured `[search.embeddings]` provider/model/input version, plus identity manifest
|
||||
|
||||
## What is not published
|
||||
|
||||
- `@me` DM guilds, channels, messages, events, attachments, mentions, wiretap sync state
|
||||
- `embedding_jobs`
|
||||
- raw bot tokens or any local secret
|
||||
|
||||
## See also
|
||||
|
||||
- [Git snapshots guide](../guides/git-snapshots.html)
|
||||
- [`subscribe`](subscribe.html)
|
||||
- [`update`](update.html)
|
||||
- [`report`](report.html)
|
||||
@ -1,35 +0,0 @@
|
||||
# `report`
|
||||
|
||||
Generates the Markdown activity block used by the shared backup repo README.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl report
|
||||
discrawl report --readme path/to/discord-backup/README.md
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
- `--readme <path>` - update the activity block in the given README file in place
|
||||
|
||||
## What gets rendered
|
||||
|
||||
Deterministic README stats:
|
||||
|
||||
- latest update time
|
||||
- latest archived message
|
||||
- archive totals
|
||||
- day / week / month activity
|
||||
|
||||
Every scheduled snapshot publish updates this block.
|
||||
|
||||
## CI integration
|
||||
|
||||
The backup workflows restore and save `.discrawl-ci/discrawl.db` with `actions/cache`. On a warm runner cache, scheduled publishers skip the pre-sync snapshot import and go straight to the live latest-message delta before publishing. Cache misses still import the latest published snapshot first so `--latest-only` has channel cursors to resume from.
|
||||
|
||||
## See also
|
||||
|
||||
- [`publish`](publish.html)
|
||||
- [Git snapshots](../guides/git-snapshots.html)
|
||||
- [`status`](status.html)
|
||||
@ -1,51 +0,0 @@
|
||||
# `search`
|
||||
|
||||
Searches archived messages. FTS is the default mode and works without embeddings.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl search "panic: nil pointer"
|
||||
discrawl search --mode fts "panic: nil pointer"
|
||||
discrawl search --mode semantic "missing launch checklist"
|
||||
discrawl search --mode hybrid "database timeout"
|
||||
discrawl search --guild 123456789012345678 "payment failed"
|
||||
discrawl search --dm "launch checklist"
|
||||
discrawl search --channel billing --author steipete --limit 50 "invoice"
|
||||
discrawl search --include-empty "GitHub"
|
||||
discrawl --json search "websocket closed"
|
||||
```
|
||||
|
||||
## Modes
|
||||
|
||||
- `fts` (default) - SQLite FTS5 with `unicode61` tokenizer; newest matches first
|
||||
- `semantic` - embeds the query, scores against locally stored vectors; errors out if embeddings are disabled or no compatible vectors exist
|
||||
- `hybrid` - runs both, deduplicates by message id, falls back to FTS when semantic is unavailable
|
||||
|
||||
## Flags
|
||||
|
||||
- `--mode <fts|semantic|hybrid>` - search mode
|
||||
- `--guild <id>` / `--guilds <id,id>` - restrict the guild scope
|
||||
- `--dm` - shorthand for `--guild @me`
|
||||
- `--channel <id|name|#name>` - restrict to one channel (id, exact name, `#name`, or partial match)
|
||||
- `--author <name>` - restrict to one author
|
||||
- `--limit <n>` - cap result count
|
||||
- `--include-empty` - include rows with no searchable content (attachment text/filenames, embeds, and replies still count as content)
|
||||
|
||||
## FTS behavior
|
||||
|
||||
User query terms are parameterized and quoted before `MATCH`, so tokens like `AND`, `OR`, `NOT`, `NEAR`, and `*` are searched as input terms instead of FTS operators. Punctuation still follows FTS5 tokenization rules.
|
||||
|
||||
## Semantic prerequisites
|
||||
|
||||
- `[search.embeddings]` configured in `~/.discrawl/config.toml`
|
||||
- local `message_embeddings` rows for the configured provider, model, and input version
|
||||
- input version is currently `message_normalized_v1`
|
||||
|
||||
Run `discrawl sync --with-embeddings` to enqueue, then `discrawl embed` to generate vectors.
|
||||
|
||||
## See also
|
||||
|
||||
- [Search modes](../guides/search-modes.html)
|
||||
- [Embeddings](../guides/embeddings.html)
|
||||
- [`messages`](messages.html) - exact slices, not search
|
||||
@ -1,25 +0,0 @@
|
||||
# `sql`
|
||||
|
||||
Runs read-only SQL against the local database.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl sql 'select count(*) as messages from messages'
|
||||
echo 'select guild_id, count(*) from messages group by guild_id' | discrawl sql -
|
||||
```
|
||||
|
||||
`-` reads SQL from stdin.
|
||||
|
||||
## Notes
|
||||
|
||||
- read-only - writes are blocked at the connection level
|
||||
- `--unsafe --confirm` opens the escape hatch for deliberate write/admin SQL
|
||||
- the schema is multi-guild ready; threads are stored as channels because that matches the Discord model
|
||||
- proven DMs use the synthetic guild id `@me`
|
||||
- SQLite schema migrations are versioned with `PRAGMA user_version`; startup fails fast when a local DB schema is newer than the supported binary
|
||||
|
||||
## See also
|
||||
|
||||
- [Data layout](../guides/data-storage.html) - what tables exist
|
||||
- [`status`](status.html) - high-level archive numbers without raw SQL
|
||||
@ -1,24 +0,0 @@
|
||||
# `status`
|
||||
|
||||
Shows local archive status.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl status
|
||||
```
|
||||
|
||||
## Reports
|
||||
|
||||
- where the local database lives
|
||||
- guild count and per-guild totals
|
||||
- channel and thread counts
|
||||
- message totals
|
||||
- latest archived message time
|
||||
- whether the Git share is configured and how stale the local import is
|
||||
- embeddings status if `[search.embeddings]` is enabled
|
||||
|
||||
## See also
|
||||
|
||||
- [`doctor`](doctor.html) - liveness check (config, auth, DB, FTS wiring)
|
||||
- [`report`](report.html) - Markdown activity block for the shared backup README
|
||||
@ -1,48 +0,0 @@
|
||||
# `subscribe`
|
||||
|
||||
Subscribes to a Git-backed snapshot repo. The Git-only setup path - no Discord bot token required.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl subscribe https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --repo ~/.discrawl/share https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --branch main https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --stale-after 15m https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --no-import https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --with-embeddings https://github.com/example/discord-archive.git
|
||||
```
|
||||
|
||||
## What it does
|
||||
|
||||
- writes a config with `discord.token_source = "none"` (so no bot token is required)
|
||||
- imports the latest snapshot into the local SQLite archive
|
||||
- enables auto-refresh: read commands fetch and import when the local share import is older than `share.stale_after` (default `15m`)
|
||||
|
||||
## Flags
|
||||
|
||||
- `--repo <path>` - local snapshot repo path
|
||||
- `--branch <name>` - snapshot branch (default: `main`)
|
||||
- `--stale-after <duration>` - how stale the local import can get before read commands auto-refresh
|
||||
- `--no-auto-update` - disable auto-refresh (use [`update`](update.html) manually)
|
||||
- `--no-import` - write config only; skip the initial pull/import
|
||||
- `--with-embeddings` - import vectors that match your local `[search.embeddings]` identity
|
||||
|
||||
## Disabled in this mode
|
||||
|
||||
`sync` and `tail` are disabled when `discord.token_source = "none"` because they need live Discord access. Switch to a token-equipped config to re-enable them.
|
||||
|
||||
## After subscribing
|
||||
|
||||
```bash
|
||||
discrawl search "launch checklist"
|
||||
discrawl messages --channel general --hours 24
|
||||
discrawl status
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [Git snapshots guide](../guides/git-snapshots.html)
|
||||
- [`publish`](publish.html)
|
||||
- [`update`](update.html)
|
||||
@ -1,82 +0,0 @@
|
||||
# `sync`
|
||||
|
||||
Refreshes SQLite from one or both archive sources.
|
||||
|
||||
By default, `sync` runs both live/local sources and does **not** import the Git snapshot first:
|
||||
|
||||
- Discord bot-token sync for bot-visible guild data
|
||||
- local Discord Desktop cache import for classifiable cached messages and proven DMs
|
||||
|
||||
Use [`update`](update.html) when you want to pull/import the shared Git snapshot. Snapshot imports normally use changed-shard deltas, but unsafe table changes fall back to a full import. If you intentionally want a sync run to import the snapshot before live deltas, pass `--update=auto` (only when stale) or `--update=force` (always). `--no-update` is accepted as an explicit no-op alias for the default.
|
||||
|
||||
Run one explicit `--full` pass when you want a complete historical guild archive. Use plain `sync` afterward for frequent latest-message and desktop-cache refreshes.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl sync
|
||||
discrawl sync --update=auto
|
||||
discrawl sync --update=force
|
||||
discrawl sync --no-update
|
||||
discrawl sync --full
|
||||
discrawl sync --full --all
|
||||
discrawl sync --guild 123456789012345678
|
||||
discrawl sync --guilds 123,456 --concurrency 8
|
||||
discrawl sync --source both # default: bot API + desktop cache
|
||||
discrawl sync --source discord # bot API only; aliases: key, bot, api
|
||||
discrawl sync --source wiretap # desktop cache only; aliases: desktop, cache
|
||||
discrawl sync --guild 123456789012345678 --all-channels
|
||||
discrawl sync --channels 111,222 --since 2026-03-01T00:00:00Z
|
||||
discrawl sync --with-embeddings
|
||||
```
|
||||
|
||||
## Sources
|
||||
|
||||
| Source | Reads from | Stores |
|
||||
| --- | --- | --- |
|
||||
| `both` | Discord bot API and local Discord Desktop cache | bot-visible guild data plus classifiable cached desktop messages |
|
||||
| `discord` / `key` | Discord bot API | guilds, channels, threads, members, and messages the bot can access |
|
||||
| `wiretap` | local Discord Desktop cache files | classifiable cached messages; proven DMs are stored under `@me` |
|
||||
|
||||
## Bot sync modes
|
||||
|
||||
| Command | Use when | Behavior |
|
||||
| --- | --- | --- |
|
||||
| `discrawl sync` | routine refresh | skips member refreshes, checks live top-level channels plus active threads, only fetches new messages for channels with a stored cursor |
|
||||
| `discrawl sync --update=auto` | hybrid Git/live refresh | imports a stale Git snapshot first, then runs the routine live refresh |
|
||||
| `discrawl sync --all-channels` | repair pass | broad incremental sweep across every stored channel/thread, including archived threads |
|
||||
| `discrawl sync --full` | historical backfill | crawls older history until channels are complete |
|
||||
|
||||
## Flags
|
||||
|
||||
- `--source <both|discord|wiretap>` - which archive sources to read
|
||||
- `--update <auto|force|none>` - whether to import the Git snapshot before live deltas
|
||||
- `--full` - historical backfill (slow on large guilds)
|
||||
- `--all-channels` - broader incremental sweep across every stored channel/thread
|
||||
- `--latest-only` - explicit latest-only run (also the default for untargeted `sync`)
|
||||
- `--all` - ignore `default_guild_id` and fan out across every discovered guild
|
||||
- `--guild <id>` / `--guilds <id,id>` - target specific guilds
|
||||
- `--channels <id,id>` - target specific channels (forum ids expand to threads)
|
||||
- `--since <RFC3339>` - limit initial history and `--full` backfill to messages at or after this timestamp
|
||||
- `--concurrency <n>` - override worker count (default auto-sized: floor 8, cap 32)
|
||||
- `--skip-members` - refresh guild/channel/message data without crawling members
|
||||
- `--with-embeddings` - also enqueue changed messages into `embedding_jobs`
|
||||
|
||||
## Notes
|
||||
|
||||
- `--latest-only` is the default for untargeted `sync`. Use `--all-channels` to opt out without doing a full historical crawl.
|
||||
- `--since` does not mark older history as complete, so a later `sync --full` without `--since` can continue the backfill.
|
||||
- Long runs emit periodic progress logs to stderr.
|
||||
- Heartbeat logs (`message sync waiting`) name the oldest active channel and per-channel page activity if in-flight channels stop completing for a while.
|
||||
- Every run ends with a `message sync finished` summary.
|
||||
- Each channel crawl has a bounded runtime budget; pathological channels are deferred and retried next sync.
|
||||
- Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl.
|
||||
- Marker cleanup is best-effort, so one missing local sync-state row cannot crash the run.
|
||||
- Full sync member refresh is best-effort and gives up after five minutes without a caller-supplied deadline.
|
||||
- When the archive is already complete, `sync --full` reuses backlog markers and limits steady-state refresh to live top-level channels plus active threads.
|
||||
|
||||
## See also
|
||||
|
||||
- [Sync sources](../guides/sync-sources.html)
|
||||
- [`tail`](tail.html)
|
||||
- [`update`](update.html)
|
||||
@ -1,33 +0,0 @@
|
||||
# `tail`
|
||||
|
||||
Runs the live Discord Gateway tail and a periodic repair loop.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl tail
|
||||
discrawl tail --guild 123456789012345678
|
||||
discrawl tail --repair-every 30m
|
||||
```
|
||||
|
||||
## What it does
|
||||
|
||||
- connects to the Discord Gateway with the configured bot token
|
||||
- writes new messages, edits, and deletes into the local archive as they arrive
|
||||
- periodically runs a repair pass to catch anything the live stream missed
|
||||
|
||||
## Flags
|
||||
|
||||
- `--guild <id>` / `--guilds <id,id>` - tail a specific guild scope (default: `default_guild_id`, or all discovered guilds if unset)
|
||||
- `--repair-every <duration>` - frequency of the repair sweep
|
||||
|
||||
## Notes
|
||||
|
||||
- requires a working Discord bot token
|
||||
- not available in Git-only mode (`discord.token_source = "none"`)
|
||||
- terminates cleanly on SIGINT / SIGTERM and treats cancellation as normal exit
|
||||
|
||||
## See also
|
||||
|
||||
- [`sync`](sync.html)
|
||||
- [Bot setup](../bot-setup.html)
|
||||
@ -1,47 +0,0 @@
|
||||
# `tui`
|
||||
|
||||
Opens the local terminal archive browser for stored messages.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl tui
|
||||
discrawl tui --guild 123456789012345678 --channel general
|
||||
discrawl tui --guilds 123,456 --author 1456464433768300635
|
||||
discrawl tui --dm
|
||||
discrawl --json tui --limit 50
|
||||
```
|
||||
|
||||
## What it shows
|
||||
|
||||
The browser uses the shared crawlkit explorer:
|
||||
|
||||
- left pane: channel, person, or thread groups
|
||||
- middle pane: newest matching message rows
|
||||
- right pane: selected message detail, attachments, replies, and thread context
|
||||
- footer: local DB or remote Git snapshot source
|
||||
|
||||
Mouse selection, right-click actions, sortable headers, refresh, and chat layout match the other crawlkit-backed archive tools.
|
||||
|
||||
## Flags
|
||||
|
||||
- `--guild <id>` / `--guilds <id,id>` - restrict the guild scope
|
||||
- `--dm` - browse local direct messages under the synthetic `@me` guild
|
||||
- `--channel <id|name|#name>` - restrict to one channel or DM conversation
|
||||
- `--author <id|name>` - restrict to one author
|
||||
- `--limit <n>` - newest rows to load (default 200)
|
||||
- `--include-empty` - include rows with no displayable/searchable content
|
||||
- `--json` - print crawlkit browser rows as JSON instead of opening the TUI
|
||||
|
||||
## Notes
|
||||
|
||||
- `tui` is read-only.
|
||||
- without `--guild`, `--guilds`, or `--dm`, it uses `default_guild_id` when configured; otherwise it can browse all stored guild rows
|
||||
- `--dm` only shows messages imported from the local Discord Desktop cache by [`wiretap`](wiretap.html)
|
||||
- `--json` is useful for launchers and agents that want the same row shape without an interactive terminal
|
||||
|
||||
## See also
|
||||
|
||||
- [`messages`](messages.html)
|
||||
- [`dms`](dms.html)
|
||||
- [`wiretap`](wiretap.html)
|
||||
@ -1,36 +0,0 @@
|
||||
# `update`
|
||||
|
||||
Forces a Git snapshot pull and import.
|
||||
|
||||
Routine imports are delta-planned from crawlkit shard fingerprints, with a Git-object fallback for older manifests. The usual publish only imports changed tail shards; unsafe table changes fall back to a full import.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl update
|
||||
discrawl update --repo ~/.discrawl/share --remote https://github.com/example/discord-archive.git
|
||||
discrawl update --with-embeddings
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
- `--repo <path>` - local snapshot repo path (defaults to `[share].repo_path`)
|
||||
- `--remote <url>` - target Git remote (defaults to `[share].remote`)
|
||||
- `--branch <name>` - snapshot branch (defaults to `[share].branch`)
|
||||
- `--with-embeddings` - also import vectors that match your local `[search.embeddings]` identity
|
||||
|
||||
## When to use it
|
||||
|
||||
- you have `share.remote` configured and want a fresh shard-delta import before running a command that does not auto-update (`sync` does not auto-import unless `--update=auto` is passed)
|
||||
- you set `--no-auto-update` when subscribing and want to refresh on demand
|
||||
- a CI job already imported the latest snapshot but read commands still consider it stale
|
||||
|
||||
## How `sync` interacts
|
||||
|
||||
`discrawl sync` does **not** auto-import the share unless `--update=auto` (only when stale) or `--update=force` (always). Routine live refreshes stay fast; explicit imports happen via `update`.
|
||||
|
||||
## See also
|
||||
|
||||
- [Git snapshots guide](../guides/git-snapshots.html)
|
||||
- [`subscribe`](subscribe.html)
|
||||
- [`sync`](sync.html)
|
||||
@ -1,47 +0,0 @@
|
||||
# `wiretap`
|
||||
|
||||
Imports classifiable Discord Desktop message payloads into the same local SQLite archive.
|
||||
|
||||
This is the path for searchable DMs because bot tokens cannot read personal direct messages.
|
||||
|
||||
`wiretap` is also available through `discrawl sync --source wiretap` and is included in the default `discrawl sync --source both` path.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
discrawl wiretap
|
||||
discrawl wiretap --path "$HOME/Library/Application Support/discord"
|
||||
discrawl wiretap --dry-run
|
||||
discrawl wiretap --full-cache
|
||||
discrawl wiretap --watch-every 2m
|
||||
```
|
||||
|
||||
## Flags
|
||||
|
||||
- `--path <dir>` - override the desktop data directory (default: platform-specific Discord cache path)
|
||||
- `--dry-run` - report what would be imported without writing anything
|
||||
- `--full-cache` - exhaustive Chromium HTTP cache import for historical guild-cache archaeology (slower)
|
||||
- `--watch-every <duration>` - keep importing on a periodic loop
|
||||
- `--max-file-bytes <n>` - skip unusually large files (default 64 MiB)
|
||||
|
||||
## Notes
|
||||
|
||||
- stores classifiable cache messages in the same `guilds`, `channels`, and `messages` tables used by bot sync
|
||||
- stores proven DMs under the synthetic guild id `@me`
|
||||
- `@me` rows stay local-only: never exported to `publish` / Git snapshot import / embedding snapshots
|
||||
- preserves existing local `@me` rows when importing a Git snapshot
|
||||
- drops message payloads whose channel cannot be classified from cached channel metadata or Discord route URLs; dropped rows are counted as `skipped_messages`
|
||||
- imports what Discord Desktop has cached locally, not complete live DM history
|
||||
- scans local `.ldb`, `.log`, `.json`, and `.txt` artifacts for Discord message JSON, plus route-bearing Chromium HTTP cache entries by default
|
||||
- does not extract, store, or print Discord auth tokens
|
||||
|
||||
## Default desktop paths
|
||||
|
||||
- macOS: `~/Library/Application Support/discord`
|
||||
- Linux: `~/.config/discord`
|
||||
|
||||
## See also
|
||||
|
||||
- [Wiretap guide](../guides/wiretap.html)
|
||||
- [`dms`](dms.html)
|
||||
- [`sync`](sync.html)
|
||||
@ -1,77 +0,0 @@
|
||||
# Configuration
|
||||
|
||||
`discrawl init` writes a complete config so most users do not hand-edit anything initially. This page documents the full shape and override rules for when you do.
|
||||
|
||||
## File layout
|
||||
|
||||
```toml
|
||||
version = 1
|
||||
default_guild_id = ""
|
||||
guild_ids = []
|
||||
db_path = "~/.discrawl/discrawl.db"
|
||||
cache_dir = "~/.discrawl/cache"
|
||||
log_dir = "~/.discrawl/logs"
|
||||
|
||||
[discord]
|
||||
token_source = "env" # use "none" for Git-only read access
|
||||
token_env = "DISCORD_BOT_TOKEN"
|
||||
token_keyring_service = "discrawl"
|
||||
token_keyring_account = "discord_bot_token"
|
||||
|
||||
[sync]
|
||||
source = "both" # "discord" for bot-only sync, "wiretap" for desktop-cache-only import
|
||||
concurrency = 16
|
||||
repair_every = "6h"
|
||||
full_history = true
|
||||
attachment_text = true
|
||||
|
||||
[desktop]
|
||||
path = "~/.config/discord" # macOS default: "~/Library/Application Support/discord"
|
||||
max_file_bytes = 67108864
|
||||
full_cache = false
|
||||
|
||||
[search]
|
||||
default_mode = "fts"
|
||||
|
||||
[search.embeddings]
|
||||
enabled = false
|
||||
provider = "openai"
|
||||
model = "text-embedding-3-small"
|
||||
api_key_env = "OPENAI_API_KEY"
|
||||
batch_size = 64
|
||||
|
||||
[share]
|
||||
remote = ""
|
||||
repo_path = "~/.discrawl/share"
|
||||
branch = "main"
|
||||
auto_update = true
|
||||
stale_after = "15m"
|
||||
```
|
||||
|
||||
`concurrency` is auto-sized at `init` to `min(32, max(8, GOMAXPROCS*2))`.
|
||||
|
||||
## Token resolution
|
||||
|
||||
In order:
|
||||
|
||||
1. `DISCORD_BOT_TOKEN`, or the env var named in `discord.token_env`
|
||||
2. OS keyring item `discrawl` / `discord_bot_token`, or the configured keyring service/account
|
||||
|
||||
`discrawl` accepts either raw token text or a value prefixed with `Bot `. Normalization is automatic.
|
||||
|
||||
Set `discord.token_source = "keyring"` if you want to require keyring lookup and skip env entirely. Set it to `"none"` for a Git-only reader.
|
||||
|
||||
## Override rules
|
||||
|
||||
- `--config <path>` beats everything
|
||||
- `DISCRAWL_CONFIG=<path>` overrides the default config path
|
||||
- `discord.token_source = "none"` disables live Discord access for Git-only readers
|
||||
- `discord.token_source = "keyring"` skips env lookup
|
||||
- `DISCRAWL_NO_AUTO_UPDATE=1` disables Git snapshot auto-update for read commands in one process
|
||||
|
||||
## Notes
|
||||
|
||||
- `default_guild_id` is the implicit scope for `sync`, `tail`, `digest`, and `analytics` when `--guild` is not passed
|
||||
- `guild_ids` is reserved for explicit multi-guild fan-out; usually you do not set this directly
|
||||
- changing `[search.embeddings]` provider/model/input version retargets pending jobs and resets prior attempts; existing vectors for another identity remain in SQLite but are not used for semantic search
|
||||
- changing `db_path` does not migrate existing data; copy the file yourself if you want to keep history
|
||||
@ -1,6 +0,0 @@
|
||||
# Contact
|
||||
|
||||
Discord archive search and analysis tooling.
|
||||
|
||||
- Source: [github.com/openclaw/discrawl](https://github.com/openclaw/discrawl)
|
||||
- Issues: [github.com/openclaw/discrawl/issues](https://github.com/openclaw/discrawl/issues)
|
||||
@ -1,51 +0,0 @@
|
||||
# Data layout
|
||||
|
||||
Everything lives in one local SQLite file. Default path: `~/.discrawl/discrawl.db`.
|
||||
|
||||
## What is stored
|
||||
|
||||
- guild metadata
|
||||
- channels and threads in one table (Discord models threads as channels)
|
||||
- current member snapshot
|
||||
- canonical message rows
|
||||
- append-only message event records
|
||||
- FTS5 index rows
|
||||
- optional local embedding queue metadata and vectors
|
||||
|
||||
Messages imported from Discord Desktop use the same message, attachment, mention, and FTS paths as bot-synced messages.
|
||||
|
||||
## DMs
|
||||
|
||||
Proven DMs use the synthetic guild id `@me`. Unclassifiable desktop-cache payloads are skipped instead of being stored as unknown synthetic data.
|
||||
|
||||
## Attachments
|
||||
|
||||
Attachment binaries are not stored in SQLite. Only attachment metadata, filenames, and (optionally) extracted text.
|
||||
|
||||
Set `sync.attachment_text = false` if you want to keep attachment metadata and filenames but disable attachment body fetches for text indexing.
|
||||
|
||||
## Multi-guild ready
|
||||
|
||||
The schema is multi-guild ready even when the common UX stays single-guild simple. Threads are stored as channels because that matches the Discord model. Archived threads are part of the sync surface.
|
||||
|
||||
## Schema migrations
|
||||
|
||||
SQLite schema migrations are versioned with `PRAGMA user_version`. Startup fails fast when a local DB schema is newer than the supported binary - that means you have a binary older than the database.
|
||||
|
||||
## Querying directly
|
||||
|
||||
Anything you want, with read-only SQL:
|
||||
|
||||
```bash
|
||||
discrawl sql 'select count(*) as messages from messages'
|
||||
echo 'select guild_id, count(*) from messages group by guild_id' | discrawl sql -
|
||||
```
|
||||
|
||||
See [`sql`](../commands/sql.html).
|
||||
|
||||
## See also
|
||||
|
||||
- [`status`](../commands/status.html) - high-level archive status
|
||||
- [`channels`](../commands/channels.html) - channel directory
|
||||
- [`members`](../commands/members.html) - member directory
|
||||
- [Security](../security.html)
|
||||
@ -1,68 +0,0 @@
|
||||
# Embeddings
|
||||
|
||||
Embeddings are optional. FTS is the default search path and the primary verification target. Embeddings enrich recall in background batches; they do not block the hot sync path.
|
||||
|
||||
## Quick path
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY="..."
|
||||
discrawl init --with-embeddings
|
||||
discrawl sync --with-embeddings
|
||||
discrawl embed --limit 1000
|
||||
discrawl search --mode semantic "launch checklist"
|
||||
discrawl search --mode hybrid "launch checklist"
|
||||
```
|
||||
|
||||
## Two-phase pipeline
|
||||
|
||||
1. **Queue** - `sync --with-embeddings` writes `embedding_jobs` rows for new messages, changed normalized text, and messages without an existing job. The embedding provider is **not** called in this phase.
|
||||
2. **Drain** - `discrawl embed` claims pending jobs with a short lock so overlapping runs do not process the same batch. It calls the configured provider, writes vectors to `message_embeddings` with provider, model, input version, dimensions, and binary vector data.
|
||||
|
||||
Behavior during drain:
|
||||
|
||||
- rate limits requeue the batch and stop that drain run cleanly
|
||||
- provider or validation failures retry up to three attempts before marking the job failed
|
||||
- messages with no normalized text are marked done and any stale vector for that message is removed
|
||||
|
||||
## Identity (provider, model, input version)
|
||||
|
||||
Stored on each job and vector. If you change provider or model:
|
||||
|
||||
- pending jobs are retargeted to the new identity
|
||||
- prior attempts are reset
|
||||
- existing vectors for another identity remain in SQLite but are not used for semantic search
|
||||
|
||||
Use `--rebuild` when you want to regenerate vectors for the existing archive after a config change:
|
||||
|
||||
```bash
|
||||
discrawl embed --rebuild --limit 1000
|
||||
```
|
||||
|
||||
## Local provider example
|
||||
|
||||
```toml
|
||||
[search.embeddings]
|
||||
enabled = true
|
||||
provider = "ollama"
|
||||
model = "nomic-embed-text"
|
||||
```
|
||||
|
||||
With local providers, message and query embedding both happen on the same machine. With remote providers, message text is sent during `discrawl embed`, and search query text is sent during `--mode semantic` or `--mode hybrid` calls.
|
||||
|
||||
## Git snapshot interaction
|
||||
|
||||
By default, `publish` does not export embeddings. Use `--with-embeddings`:
|
||||
|
||||
```bash
|
||||
discrawl publish --with-embeddings --push
|
||||
discrawl subscribe --with-embeddings https://github.com/example/discord-archive.git
|
||||
discrawl update --with-embeddings
|
||||
```
|
||||
|
||||
The snapshot stores vectors under `embeddings/<provider>/<model>/<input_version>/...` and records that identity in `manifest.json`. Only vectors for non-DM messages are exported. Import only restores matching embedding manifests, so an Ollama/nomic subscriber does not accidentally import OpenAI/text-embedding vectors. `embedding_jobs` is never exported; subscribers that want fresh local vectors run `discrawl embed --rebuild`. Publishing without `--with-embeddings` omits embedding manifests instead of carrying forward an older bundle.
|
||||
|
||||
## See also
|
||||
|
||||
- [Search modes](search-modes.html)
|
||||
- [`embed`](../commands/embed.html)
|
||||
- [Configuration](../configuration.html)
|
||||
@ -1,84 +0,0 @@
|
||||
# Git-backed snapshots
|
||||
|
||||
Discrawl can publish the SQLite archive as sharded, compressed NDJSON snapshots in a private Git repo, then auto-import that repo before local read commands. This gives readers org memory without Discord credentials.
|
||||
|
||||
Snapshot packing/import and git mirror mechanics are shared through
|
||||
`crawlkit`. Discrawl still owns Discord-specific privacy policy: `@me` direct
|
||||
messages, wiretap sync state, and local-only desktop rows are excluded from
|
||||
published snapshots and are preserved locally on import.
|
||||
|
||||
## Publisher
|
||||
|
||||
```bash
|
||||
discrawl publish --remote https://github.com/example/discord-archive.git --push
|
||||
discrawl publish --readme path/to/discord-backup/README.md --push
|
||||
```
|
||||
|
||||
The publisher uses your existing bot-synced archive. It exports non-DM tables only.
|
||||
|
||||
## Subscriber
|
||||
|
||||
```bash
|
||||
discrawl subscribe https://github.com/example/discord-archive.git
|
||||
discrawl search "launch checklist"
|
||||
discrawl messages --channel general --hours 24
|
||||
```
|
||||
|
||||
`subscribe` is the Git-only setup path. It writes a config with `discord.token_source = "none"`, imports the snapshot, and does not require a Discord bot token. `sync` and `tail` remain disabled in this mode because they need live Discord access.
|
||||
|
||||
## Auto-update
|
||||
|
||||
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`):
|
||||
|
||||
```bash
|
||||
discrawl subscribe --stale-after 15m https://github.com/example/discord-archive.git
|
||||
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
|
||||
```
|
||||
|
||||
`discrawl update` forces the same pull/import step manually. Snapshot imports are delta-planned from crawlkit shard fingerprints. Older manifests without those fields fall back to Git blob identity, so the common publish shape only imports the changed message tail shard plus small cursor tables. Unsafe table-shape changes still fall back to a full import.
|
||||
|
||||
`discrawl sync` does **not** auto-import the share unless `--update=auto` or `--update=force` is provided, so routine live refreshes stay fast.
|
||||
|
||||
## Hybrid mode
|
||||
|
||||
Keep normal Discord credentials configured **and** set `share.remote`:
|
||||
|
||||
```bash
|
||||
discrawl sync --update=auto # import snapshot delta first, then live deltas
|
||||
discrawl messages --sync # blocking pre-query sync for matched scope
|
||||
discrawl sync --all-channels # broader live repair
|
||||
discrawl sync --full # historical backfill
|
||||
```
|
||||
|
||||
## What is published
|
||||
|
||||
- non-DM archive tables (DM `@me` rows are always excluded)
|
||||
- README activity block - latest update time, latest archived message, archive totals, day/week/month activity
|
||||
- `embedding_jobs` is never exported
|
||||
|
||||
## Backing up vectors
|
||||
|
||||
```bash
|
||||
discrawl publish --with-embeddings --push
|
||||
discrawl subscribe --with-embeddings https://github.com/example/discord-archive.git
|
||||
discrawl update --with-embeddings
|
||||
```
|
||||
|
||||
Stored under `embeddings/<provider>/<model>/<input_version>/...`. Import only restores matching identities; Ollama/nomic subscribers do not accidentally pick up OpenAI/text-embedding vectors. Publishing without `--with-embeddings` omits embedding manifests instead of carrying forward an older bundle.
|
||||
|
||||
## CI
|
||||
|
||||
The Docker smoke test installs `discrawl` in a clean Go container, subscribes to a Git snapshot repo, then checks `search`, `messages`, `sql`, and `report`:
|
||||
|
||||
```bash
|
||||
DISCRAWL_DOCKER_TEST=1 go test ./internal/cli -run TestDockerGitSourceSmoke -count=1
|
||||
```
|
||||
|
||||
The backup workflows restore and save `.discrawl-ci/discrawl.db` with `actions/cache`. On a warm runner cache, scheduled publishers skip the pre-sync snapshot import and go straight to the live latest-message delta before publishing. Cache misses still import the latest published snapshot first so `--latest-only` has channel cursors to resume from.
|
||||
|
||||
## See also
|
||||
|
||||
- [`publish`](../commands/publish.html)
|
||||
- [`subscribe`](../commands/subscribe.html)
|
||||
- [`update`](../commands/update.html)
|
||||
- [`report`](../commands/report.html)
|
||||
@ -1,57 +0,0 @@
|
||||
# Search modes
|
||||
|
||||
`discrawl search` has three modes. FTS is the default and works with no embeddings.
|
||||
|
||||
## Modes
|
||||
|
||||
- **`fts`** (default) - searches the local SQLite FTS5 index, returns newest matching messages first
|
||||
- **`semantic`** - embeds the query, scores against locally stored message vectors; errors out cleanly if embeddings are disabled or no compatible vectors exist
|
||||
- **`hybrid`** - runs FTS and semantic, deduplicates by message id, falls back to FTS when semantic is unavailable
|
||||
|
||||
## FTS details
|
||||
|
||||
- backed by SQLite FTS5 with the default `unicode61` tokenizer
|
||||
- user query terms are parameterized and quoted before `MATCH`, so tokens like `AND`, `OR`, `NOT`, `NEAR`, and `*` are searched as input terms instead of FTS operators
|
||||
- punctuation still follows FTS5 tokenization rules
|
||||
- by default, `search` skips rows with no searchable content (attachment text, attachment filenames, embeds, and replies still count as content); use `--include-empty` to opt back in
|
||||
|
||||
## Semantic and hybrid prerequisites
|
||||
|
||||
- `[search.embeddings]` configured in `~/.discrawl/config.toml`
|
||||
- local `message_embeddings` rows for the configured provider, model, and input version
|
||||
- input version is currently `message_normalized_v1`, so vectors are tied to normalized message text rather than raw Discord payloads
|
||||
|
||||
Two-phase embedding creation:
|
||||
|
||||
1. `discrawl sync --with-embeddings` queues changed messages by writing `embedding_jobs` rows. New messages, changed normalized text, and messages without an existing job are queued. This phase does not call the embedding provider.
|
||||
2. `discrawl embed` drains pending jobs in bounded batches, calls the configured provider, and writes vectors to `message_embeddings`.
|
||||
|
||||
## Provider/model identity
|
||||
|
||||
The provider/model/input-version identity is stored on each job and vector. If you change provider or model, pending jobs are retargeted to the new identity and prior attempts are reset. Existing vectors for another identity remain in SQLite, but semantic search only reads vectors compatible with the current config.
|
||||
|
||||
Use `--rebuild` when changing provider, model, or input settings and you want to regenerate vectors for the existing archive.
|
||||
|
||||
## Local vs remote providers
|
||||
|
||||
Local providers like Ollama keep both message and query embedding on the same machine. With remote providers (OpenAI, etc.), message text is sent during `discrawl embed`, and search query text is sent when using `--mode semantic` or `--mode hybrid`. Stored message text is not sent during local vector scoring.
|
||||
|
||||
## Examples
|
||||
|
||||
```bash
|
||||
discrawl search "panic: nil pointer"
|
||||
discrawl search --mode fts "panic: nil pointer"
|
||||
discrawl search --mode semantic "missing launch checklist"
|
||||
discrawl search --mode hybrid "database timeout"
|
||||
discrawl search --guild 123456789012345678 "payment failed"
|
||||
discrawl search --dm "launch checklist"
|
||||
discrawl search --channel billing --author steipete --limit 50 "invoice"
|
||||
discrawl search --include-empty "GitHub"
|
||||
discrawl --json search "websocket closed"
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [`search`](../commands/search.html)
|
||||
- [`embed`](../commands/embed.html)
|
||||
- [Embeddings](embeddings.html)
|
||||
@ -1,57 +0,0 @@
|
||||
# Sync sources
|
||||
|
||||
Discrawl reads from two local archive sources. Either or both can run in a single `sync`.
|
||||
|
||||
## Sources
|
||||
|
||||
| Source | Reads from | Stores |
|
||||
| --- | --- | --- |
|
||||
| `both` | Discord bot API and local Discord Desktop cache | bot-visible guild data plus classifiable cached desktop messages |
|
||||
| `discord` / `key` / `bot` / `api` | Discord bot API | guilds, channels, threads, members, and messages the bot can access |
|
||||
| `wiretap` / `desktop` / `cache` | local Discord Desktop cache files | classifiable cached messages; proven DMs are stored under `@me` |
|
||||
|
||||
The default is `both`. Pick one with `--source` or by setting `[sync].source` in config.
|
||||
|
||||
## Bot sync modes
|
||||
|
||||
Sync modes control the Discord bot API side of a run. When `wiretap` is selected, the desktop cache import runs once alongside the chosen bot sync mode.
|
||||
|
||||
| Command | Use when | Behavior |
|
||||
| --- | --- | --- |
|
||||
| `discrawl sync` | routine refresh | skips member refreshes, checks live top-level channels plus active threads, only fetches new messages for channels with a stored latest cursor |
|
||||
| `discrawl sync --update=auto` | hybrid Git/live refresh | imports a stale Git snapshot first, usually as a changed-shard delta, then runs the routine live refresh |
|
||||
| `discrawl sync --all-channels` | repair pass | broad incremental sweep across every stored channel/thread, including archived threads |
|
||||
| `discrawl sync --full` | historical backfill | crawls older history until channels are complete; can take a long time on large servers |
|
||||
|
||||
Run one explicit `--full` pass when you want a complete historical guild archive. Use plain `sync` afterward for frequent latest-message and desktop-cache refreshes.
|
||||
|
||||
## Concurrency
|
||||
|
||||
`sync` already uses parallel channel workers for bot API message crawling. The default is auto-sized from `GOMAXPROCS` with a floor of `8` and a cap of `32`. Override with `--concurrency`.
|
||||
|
||||
## Targeting
|
||||
|
||||
- `--guild <id>` runs only that guild
|
||||
- `--guilds 123,456` runs an explicit set
|
||||
- `--all` ignores `default_guild_id` and fans out across every discovered guild
|
||||
- `--channels 111,222` targets specific channels (forum ids expand to their threads)
|
||||
- `--since <RFC3339>` limits initial history and `--full` backfill to messages at or after the timestamp; older history is not marked complete, so a later `sync --full` without `--since` can continue the backfill
|
||||
|
||||
## Performance and resilience
|
||||
|
||||
- Long runs emit periodic progress logs to stderr.
|
||||
- If in-flight channels stop completing for a while, `discrawl` emits `message sync waiting` heartbeat logs with the oldest active channel, per-channel page activity, and skip/defer counters.
|
||||
- Every run ends with a `message sync finished` summary.
|
||||
- Each channel crawl has a bounded runtime budget; pathological channels are deferred and retried on the next sync.
|
||||
- Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl.
|
||||
- Marker cleanup is best-effort, so one missing local sync-state row cannot crash the run.
|
||||
- Full sync member refresh is best-effort and gives up after five minutes without a caller-supplied deadline, so message sync completion is not held hostage by a slow guild member crawl.
|
||||
- When the archive is already complete, `sync --full` reuses backlog markers and limits steady-state refresh to live top-level channels plus active threads instead of revisiting every stored archived thread.
|
||||
- If a guild already has a local member snapshot, routine syncs reuse it and skip another full member crawl until that snapshot ages out.
|
||||
|
||||
## See also
|
||||
|
||||
- [`sync`](../commands/sync.html)
|
||||
- [`tail`](../commands/tail.html)
|
||||
- [Wiretap](wiretap.html)
|
||||
- [Git snapshots](git-snapshots.html)
|
||||
@ -1,61 +0,0 @@
|
||||
# Desktop wiretap
|
||||
|
||||
`wiretap` imports classifiable Discord Desktop message payloads into the same local SQLite archive used by bot sync. It is the path for searchable DMs because bot tokens cannot read personal direct messages.
|
||||
|
||||
`wiretap` is also available through `discrawl sync --source wiretap` and is included in the default `discrawl sync --source both` path.
|
||||
|
||||
## What it does
|
||||
|
||||
- stores classifiable cache messages in the same `guilds`, `channels`, and `messages` tables used by bot sync
|
||||
- stores proven DMs under the synthetic guild id `@me`
|
||||
- preserves existing local `@me` guilds, channels, messages, and attachments when importing a Git snapshot, so a shared guild mirror refresh does not wipe local wiretap DM search
|
||||
- drops message payloads whose channel cannot be classified from cached channel metadata or Discord route URLs; dropped rows are counted as `skipped_messages`
|
||||
- imports what Discord Desktop has cached locally - not complete live DM history
|
||||
|
||||
## What it does not do
|
||||
|
||||
- does not extract, store, or print Discord auth tokens
|
||||
- does not use a user token
|
||||
- does not call the Discord API as your user
|
||||
- does not run as a selfbot
|
||||
|
||||
## DM privacy: `@me` stays local
|
||||
|
||||
`@me` rows are local-only. Excluded from:
|
||||
|
||||
- `publish` (Git snapshot output)
|
||||
- `subscribe` / Git snapshot import
|
||||
- `--with-embeddings` snapshot export
|
||||
|
||||
Excluded categories: DM guilds, channels, messages, events, attachments, mentions, wiretap sync state, and vectors for DM messages.
|
||||
|
||||
## What gets scanned
|
||||
|
||||
- local `.ldb`, `.log`, `.json`, and `.txt` artifacts for Discord message JSON
|
||||
- route-bearing Chromium HTTP cache entries by default
|
||||
- `--full-cache` (or `desktop.full_cache = true`) enables exhaustive Chromium cache import for slower historical guild-cache archaeology
|
||||
- `--max-file-bytes` skips unusually large files (default 64 MiB)
|
||||
|
||||
## Flags
|
||||
|
||||
```bash
|
||||
discrawl wiretap
|
||||
discrawl wiretap --path "$HOME/Library/Application Support/discord"
|
||||
discrawl wiretap --dry-run
|
||||
discrawl wiretap --full-cache
|
||||
discrawl wiretap --watch-every 2m
|
||||
```
|
||||
|
||||
`--watch-every` keeps the import running on a periodic loop. `--dry-run` reports what would be imported without writing anything.
|
||||
|
||||
## Default desktop paths
|
||||
|
||||
- macOS: `~/Library/Application Support/discord`
|
||||
- Linux: `~/.config/discord`
|
||||
- override via `--path` or `[desktop].path`
|
||||
|
||||
## See also
|
||||
|
||||
- [`wiretap`](../commands/wiretap.html)
|
||||
- [`dms`](../commands/dms.html) - convenience layer over `@me`
|
||||
- [Sync sources](sync-sources.html)
|
||||
@ -1,13 +0,0 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta http-equiv="refresh" content="0; url=README.html">
|
||||
<link rel="canonical" href="README.html">
|
||||
<title>Discrawl docs</title>
|
||||
</head>
|
||||
<body>
|
||||
<p><a href="README.html">Discrawl docs</a></p>
|
||||
</body>
|
||||
</html>
|
||||
@ -1,66 +0,0 @@
|
||||
# Install
|
||||
|
||||
Discrawl is a single Go binary. Install via Homebrew or build from source.
|
||||
|
||||
## Homebrew
|
||||
|
||||
```bash
|
||||
brew install steipete/tap/discrawl
|
||||
discrawl --version
|
||||
```
|
||||
|
||||
The tap auto-installs from `steipete/tap`.
|
||||
|
||||
## From source
|
||||
|
||||
Requires Go `1.26+`.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/openclaw/discrawl.git
|
||||
cd discrawl
|
||||
go build -o bin/discrawl ./cmd/discrawl
|
||||
./bin/discrawl --version
|
||||
```
|
||||
|
||||
If you do not put `discrawl` on `PATH`, replace `discrawl` with `./bin/discrawl` in any example below.
|
||||
|
||||
## Quick start (with bot token)
|
||||
|
||||
```bash
|
||||
export DISCORD_BOT_TOKEN="your-bot-token"
|
||||
discrawl init
|
||||
discrawl doctor
|
||||
discrawl sync --full
|
||||
discrawl sync
|
||||
discrawl search "panic: nil pointer"
|
||||
discrawl tail
|
||||
```
|
||||
|
||||
`init` discovers accessible guilds and writes `~/.discrawl/config.toml`. If exactly one guild is available, it becomes the default automatically.
|
||||
|
||||
`doctor` verifies the config loads, the token resolves, the bot can reach the Gateway, and the local DB and FTS index are wired up.
|
||||
|
||||
## Quick start (Git-only reader)
|
||||
|
||||
No Discord credentials required. You read a private Git snapshot another machine published.
|
||||
|
||||
```bash
|
||||
discrawl subscribe https://github.com/example/discord-archive.git
|
||||
discrawl search "launch checklist"
|
||||
discrawl messages --channel general --hours 24
|
||||
```
|
||||
|
||||
`subscribe` writes a token-free config (`discord.token_source = "none"`) and imports the snapshot. Read commands auto-refresh when the local snapshot is older than `15m`.
|
||||
|
||||
## Default runtime paths
|
||||
|
||||
- config: `~/.discrawl/config.toml`
|
||||
- database: `~/.discrawl/discrawl.db`
|
||||
- cache: `~/.discrawl/cache/`
|
||||
- logs: `~/.discrawl/logs/`
|
||||
|
||||
## Next steps
|
||||
|
||||
- [Bot setup](bot-setup.html) - intents, permissions, token sources
|
||||
- [Configuration](configuration.html) - the full TOML shape and override rules
|
||||
- [`sync`](commands/sync.html) - the main archive command
|
||||
@ -1,49 +0,0 @@
|
||||
# Security
|
||||
|
||||
## Tokens and credentials
|
||||
|
||||
- Do not commit bot tokens or API keys.
|
||||
- Default config lives in your home directory, not inside the repo.
|
||||
- Prefer env vars or the OS keyring for bot tokens.
|
||||
- `discrawl doctor` reports the token source (env or keyring), not token contents.
|
||||
|
||||
## Wiretap is local-only
|
||||
|
||||
`wiretap` reads local Discord Desktop cache files only. It does not:
|
||||
|
||||
- extract, store, or print Discord auth tokens
|
||||
- use a user token
|
||||
- call the Discord API as your user
|
||||
- run as a selfbot
|
||||
|
||||
Wiretap DM messages stay local. They are stored under the synthetic guild id `@me` and are never exported to:
|
||||
|
||||
- `publish` (Git snapshot output)
|
||||
- `subscribe` / Git snapshot import
|
||||
- the optional `--with-embeddings` snapshot export
|
||||
|
||||
A shared guild mirror refresh does not wipe local wiretap DM search either - import preserves existing local `@me` guilds, channels, messages, and attachments.
|
||||
|
||||
## CI
|
||||
|
||||
CI runs secret scanning with `gitleaks` against git history and the working tree.
|
||||
|
||||
## What is stored locally
|
||||
|
||||
- guild metadata
|
||||
- channels and threads (one table)
|
||||
- current member snapshot
|
||||
- canonical message rows
|
||||
- append-only message event records
|
||||
- FTS index rows
|
||||
- optional local embedding queue metadata and vectors
|
||||
|
||||
Attachment binaries are not stored in SQLite. Only attachment metadata and (optionally) extracted text.
|
||||
|
||||
Set `sync.attachment_text = false` if you want to keep attachment metadata and filenames but disable attachment body fetches for text indexing.
|
||||
|
||||
## What is sent over the wire
|
||||
|
||||
With remote embedding providers, message text is sent during `discrawl embed`, and search query text is sent when using `--mode semantic` or `--mode hybrid`. Stored message text is not sent during local vector scoring.
|
||||
|
||||
Local providers like Ollama keep both message and query embedding on the same machine.
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 146 KiB |
@ -1,79 +0,0 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="630" viewBox="0 0 1200 630" role="img" aria-labelledby="title desc">
|
||||
<title id="title">Discrawl social card</title>
|
||||
<desc id="desc">Discrawl mirrors Discord into SQLite for local search and analysis.</desc>
|
||||
<defs>
|
||||
<linearGradient id="bg" x1="0" y1="0" x2="1" y2="1">
|
||||
<stop offset="0" stop-color="#0b0f16"/>
|
||||
<stop offset="0.58" stop-color="#111723"/>
|
||||
<stop offset="1" stop-color="#151827"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="accent" x1="0" y1="0" x2="1" y2="0">
|
||||
<stop offset="0" stop-color="#5fe3d4"/>
|
||||
<stop offset="0.56" stop-color="#a594ff"/>
|
||||
<stop offset="1" stop-color="#f364a2"/>
|
||||
</linearGradient>
|
||||
<linearGradient id="terminal" x1="0" y1="0" x2="0" y2="1">
|
||||
<stop offset="0" stop-color="#161d2a"/>
|
||||
<stop offset="1" stop-color="#0f141d"/>
|
||||
</linearGradient>
|
||||
<filter id="shadow" x="-10%" y="-15%" width="120%" height="130%">
|
||||
<feDropShadow dx="0" dy="22" stdDeviation="22" flood-color="#000000" flood-opacity="0.45"/>
|
||||
</filter>
|
||||
<filter id="softGlow" x="-40%" y="-40%" width="180%" height="180%">
|
||||
<feGaussianBlur stdDeviation="36"/>
|
||||
</filter>
|
||||
</defs>
|
||||
|
||||
<rect width="1200" height="630" fill="url(#bg)"/>
|
||||
<circle cx="1030" cy="92" r="210" fill="#5fe3d4" opacity="0.11" filter="url(#softGlow)"/>
|
||||
<circle cx="104" cy="568" r="240" fill="#f364a2" opacity="0.10" filter="url(#softGlow)"/>
|
||||
<path d="M0 515 C190 438 330 548 512 472 S874 330 1200 410 L1200 630 L0 630 Z" fill="#0a0d13" opacity="0.55"/>
|
||||
<path d="M0 534 C206 456 338 570 520 492 S884 360 1200 438" fill="none" stroke="url(#accent)" stroke-width="3" opacity="0.44"/>
|
||||
|
||||
<g transform="translate(72 70)">
|
||||
<rect x="0" y="0" width="112" height="112" rx="22" fill="#0c0f14" stroke="#253244" stroke-width="2"/>
|
||||
<rect x="23" y="28" width="66" height="47" rx="5" fill="none" stroke="#5fe3d4" stroke-width="4"/>
|
||||
<line x1="23" y1="43" x2="89" y2="43" stroke="#5fe3d4" stroke-width="3"/>
|
||||
<circle cx="33" cy="36" r="2.8" fill="#f364a2"/>
|
||||
<circle cx="43" cy="36" r="2.8" fill="#f7c177"/>
|
||||
<circle cx="53" cy="36" r="2.8" fill="#5fe3d4"/>
|
||||
<text x="31" y="59" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="10" font-weight="800" fill="#5fe3d4">SELECT</text>
|
||||
<text x="31" y="71" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="10" font-weight="800" fill="#aab3c1">msgs</text>
|
||||
<rect x="23" y="84" width="66" height="6" rx="3" fill="#161b24"/>
|
||||
<rect x="23" y="84" width="42" height="6" rx="3" fill="#5fe3d4"/>
|
||||
</g>
|
||||
|
||||
<text x="205" y="126" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="28" font-weight="800" letter-spacing="2" fill="#5fe3d4">discrawl.sh</text>
|
||||
<text x="72" y="248" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="96" font-weight="800" letter-spacing="-3" fill="#edf4fb">Discord history,</text>
|
||||
<text x="72" y="346" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="96" font-weight="800" letter-spacing="-3" fill="#edf4fb">local answers.</text>
|
||||
<text x="74" y="410" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="30" font-weight="560" fill="#aab3c1">Mirror Discord into SQLite.</text>
|
||||
<text x="74" y="450" font-family="Inter, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="30" font-weight="560" fill="#aab3c1">Search, query, tail, and analyze locally.</text>
|
||||
|
||||
<g transform="translate(72 505)">
|
||||
<rect x="0" y="0" width="210" height="54" rx="10" fill="#5fe3d4"/>
|
||||
<text x="28" y="35" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="20" font-weight="900" fill="#081016">discrawl sync</text>
|
||||
<rect x="230" y="0" width="228" height="54" rx="10" fill="#151d29" stroke="#263448" stroke-width="2"/>
|
||||
<text x="258" y="35" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="20" font-weight="800" fill="#f364a2">discrawl search</text>
|
||||
</g>
|
||||
|
||||
<g transform="translate(742 135)" filter="url(#shadow)">
|
||||
<rect x="0" y="0" width="386" height="330" rx="20" fill="url(#terminal)" stroke="#263448" stroke-width="2"/>
|
||||
<rect x="0" y="0" width="386" height="54" rx="20" fill="#121925"/>
|
||||
<path d="M0 34 Q0 0 34 0 H352 Q386 0 386 34 V54 H0 Z" fill="#121925"/>
|
||||
<circle cx="30" cy="27" r="7" fill="#f364a2"/>
|
||||
<circle cx="54" cy="27" r="7" fill="#f7c177"/>
|
||||
<circle cx="78" cy="27" r="7" fill="#5fe3d4"/>
|
||||
<text x="112" y="34" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="16" font-weight="800" fill="#657287">sqlite archive</text>
|
||||
<text x="28" y="95" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="20" font-weight="800" fill="#5fe3d4">$ discrawl wiretap</text>
|
||||
<text x="28" y="132" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="650" fill="#6f7b8d">dm cache imported: 814</text>
|
||||
<text x="28" y="180" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="20" font-weight="800" fill="#5fe3d4">$ discrawl sql</text>
|
||||
<text x="28" y="218" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="650" fill="#edf4fb">312k messages</text>
|
||||
<text x="28" y="255" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="650" fill="#edf4fb">14k attachments</text>
|
||||
<text x="28" y="292" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="650" fill="#edf4fb">FTS5 ready</text>
|
||||
<rect x="286" y="260" width="72" height="10" rx="5" fill="#263448"/>
|
||||
<rect x="312" y="282" width="46" height="10" rx="5" fill="#263448"/>
|
||||
<rect x="298" y="304" width="60" height="10" rx="5" fill="#263448"/>
|
||||
</g>
|
||||
|
||||
<text x="72" y="600" font-family="JetBrains Mono, Menlo, Consolas, monospace" font-size="18" font-weight="800" fill="#657287">bot sync + desktop wiretap + FTS5 + semantic search</text>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 5.9 KiB |
48
go.mod
48
go.mod
@ -1,58 +1,28 @@
|
||||
module github.com/openclaw/discrawl
|
||||
module github.com/steipete/discrawl
|
||||
|
||||
go 1.26.3
|
||||
go 1.26.2
|
||||
|
||||
require (
|
||||
github.com/bwmarrin/discordgo v0.29.0
|
||||
github.com/gorilla/websocket v1.5.3
|
||||
github.com/pelletier/go-toml/v2 v2.3.0
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/zalando/go-keyring v0.2.8
|
||||
golang.org/x/sys v0.43.0
|
||||
golang.org/x/text v0.36.0
|
||||
golang.org/x/text v0.35.0
|
||||
modernc.org/sqlite v1.49.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/charmbracelet/bubbles v1.0.0 // indirect
|
||||
github.com/clipperhouse/displaywidth v0.11.0 // indirect
|
||||
github.com/clipperhouse/uax29/v2 v2.7.0 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
|
||||
modernc.org/sqlite v1.50.0 // indirect
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
|
||||
github.com/charmbracelet/bubbletea v1.3.10 // indirect
|
||||
github.com/charmbracelet/colorprofile v0.4.1 // indirect
|
||||
github.com/charmbracelet/lipgloss v1.1.0 // indirect
|
||||
github.com/charmbracelet/x/ansi v0.11.7 // indirect
|
||||
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
|
||||
github.com/charmbracelet/x/term v0.2.2 // indirect
|
||||
github.com/danieljoos/wincred v1.2.3 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
|
||||
github.com/godbus/dbus/v5 v5.2.2 // indirect
|
||||
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/lucasb-eyer/go-colorful v1.4.0 // indirect
|
||||
github.com/mattn/go-isatty v0.0.22 // indirect
|
||||
github.com/mattn/go-localereader v0.0.1 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.23 // indirect
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
|
||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||
github.com/muesli/termenv v0.16.0 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||
github.com/openclaw/crawlkit v0.5.0
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||
golang.org/x/crypto v0.50.0 // indirect
|
||||
golang.org/x/tools v0.44.0 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
golang.org/x/crypto v0.49.0 // indirect
|
||||
golang.org/x/sys v0.42.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
modernc.org/libc v1.72.1 // indirect
|
||||
modernc.org/libc v1.72.0 // indirect
|
||||
modernc.org/mathutil v1.7.1 // indirect
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
)
|
||||
|
||||
118
go.sum
118
go.sum
@ -1,38 +1,11 @@
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
|
||||
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
|
||||
github.com/bwmarrin/discordgo v0.29.0 h1:FmWeXFaKUwrcL3Cx65c20bTRW+vOb6k8AnaP+EgjDno=
|
||||
github.com/bwmarrin/discordgo v0.29.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY=
|
||||
github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc=
|
||||
github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
|
||||
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
|
||||
github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4=
|
||||
github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk=
|
||||
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
|
||||
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
|
||||
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
|
||||
github.com/charmbracelet/x/ansi v0.11.7 h1:kzv1kJvjg2S3r9KHo8hDdHFQLEqn4RBCb39dAYC84jI=
|
||||
github.com/charmbracelet/x/ansi v0.11.7/go.mod h1:9qGpnAVYz+8ACONkZBUWPtL7lulP9No6p1epAihUZwQ=
|
||||
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
|
||||
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
|
||||
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
|
||||
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
|
||||
github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8=
|
||||
github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0=
|
||||
github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk=
|
||||
github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/danieljoos/wincred v1.2.3 h1:v7dZC2x32Ut3nEfRH+vhoZGvN72+dQ/snVXo/vMFLdQ=
|
||||
github.com/danieljoos/wincred v1.2.3/go.mod h1:6qqX0WNrS4RzPZ1tnroDzq9kY3fu1KwE7MRLQK4X0bs=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
|
||||
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
|
||||
github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ=
|
||||
github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
|
||||
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 h1:EwtI+Al+DeppwYX2oXJCETMO23COyaKGP6fHVpkpWpg=
|
||||
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936/go.mod h1:MxpfABSjhmINe3F1It9d+8exIHFvUqtLIRCdOGNXqiI=
|
||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
|
||||
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
@ -40,80 +13,45 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/lucasb-eyer/go-colorful v1.4.0 h1:UtrWVfLdarDgc44HcS7pYloGHJUjHV/4FwW4TvVgFr4=
|
||||
github.com/lucasb-eyer/go-colorful v1.4.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
|
||||
github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
|
||||
github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
|
||||
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
|
||||
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
|
||||
github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw=
|
||||
github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
|
||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
|
||||
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
|
||||
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
|
||||
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
|
||||
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||
github.com/openclaw/crawlkit v0.5.0 h1:sVqIbQ5v6LiOf+NXcVj93UhfoaJqMbBlrd1lU6uhO9M=
|
||||
github.com/openclaw/crawlkit v0.5.0/go.mod h1:/AI8o/DeRqXPZJPHq/9mGUjNzLPskm/wTjikRPxEdHY=
|
||||
github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc=
|
||||
github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
|
||||
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
||||
github.com/pelletier/go-toml/v2 v2.3.0 h1:k59bC/lIZREW0/iVaQR8nDHxVq8OVlIzYCOJf421CaM=
|
||||
github.com/pelletier/go-toml/v2 v2.3.0/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
|
||||
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
|
||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||
github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs=
|
||||
github.com/zalando/go-keyring v0.2.8/go.mod h1:tsMo+VpRq5NGyKfxoBVjCuMrG47yj8cmakZDO5QGii0=
|
||||
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
|
||||
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
|
||||
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
|
||||
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
|
||||
golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM=
|
||||
golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU=
|
||||
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
|
||||
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
|
||||
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
|
||||
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
|
||||
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
|
||||
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
|
||||
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
|
||||
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c=
|
||||
golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI=
|
||||
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
|
||||
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
modernc.org/cc/v4 v4.28.1 h1:XpLbkYVQ24E8tX5u8+yWGvaxerxkR/S4zqxI8ZoSBuc=
|
||||
modernc.org/cc/v4 v4.28.1/go.mod h1:OnovgIhbbMXMu1aISnJ0wvVD1KnW+cAUJkIrAWh+kVI=
|
||||
modernc.org/ccgo/v4 v4.33.0 h1:dspBCm75jsj8Y/ufwAMVfe375L2iYdMyQ2QG/v3hL54=
|
||||
modernc.org/ccgo/v4 v4.33.0/go.mod h1:+RhXBoRYzRwaH21mV/aj6XvQRDtfjcZfAlPMsQo8CR0=
|
||||
modernc.org/cc/v4 v4.27.3 h1:uNCgn37E5U09mTv1XgskEVUJ8ADKpmFMPxzGJ0TSo+U=
|
||||
modernc.org/cc/v4 v4.27.3/go.mod h1:3YjcbCqhoTTHPycJDRl2WZKKFj0nwcOIPBfEZK0Hdk8=
|
||||
modernc.org/ccgo/v4 v4.32.4 h1:L5OB8rpEX4ZsXEQwGozRfJyJSFHbbNVOoQ59DU9/KuU=
|
||||
modernc.org/ccgo/v4 v4.32.4/go.mod h1:lY7f+fiTDHfcv6YlRgSkxYfhs+UvOEEzj49jAn2TOx0=
|
||||
modernc.org/fileutil v1.4.0 h1:j6ZzNTftVS054gi281TyLjHPp6CPHr2KCxEXjEbD6SM=
|
||||
modernc.org/fileutil v1.4.0/go.mod h1:EqdKFDxiByqxLk8ozOxObDSfcVOv/54xDs/DUHdvCUU=
|
||||
modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI=
|
||||
@ -122,18 +60,18 @@ modernc.org/gc/v3 v3.1.2 h1:ZtDCnhonXSZexk/AYsegNRV1lJGgaNZJuKjJSWKyEqo=
|
||||
modernc.org/gc/v3 v3.1.2/go.mod h1:HFK/6AGESC7Ex+EZJhJ2Gni6cTaYpSMmU/cT9RmlfYY=
|
||||
modernc.org/goabi0 v0.2.0 h1:HvEowk7LxcPd0eq6mVOAEMai46V+i7Jrj13t4AzuNks=
|
||||
modernc.org/goabi0 v0.2.0/go.mod h1:CEFRnnJhKvWT1c1JTI3Avm+tgOWbkOu5oPA8eH8LnMI=
|
||||
modernc.org/libc v1.72.1 h1:db1xwJ6u1kE3KHTFTTbe2GCrczHPKzlURP0aDC4NGD0=
|
||||
modernc.org/libc v1.72.1/go.mod h1:HRMiC/PhPGLIPM7GzAFCbI+oSgE3dhZ8FWftmRrHVlY=
|
||||
modernc.org/libc v1.72.0 h1:IEu559v9a0XWjw0DPoVKtXpO2qt5NVLAnFaBbjq+n8c=
|
||||
modernc.org/libc v1.72.0/go.mod h1:tTU8DL8A+XLVkEY3x5E/tO7s2Q/q42EtnNWda/L5QhQ=
|
||||
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
||||
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
||||
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
|
||||
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
|
||||
modernc.org/opt v0.2.0 h1:tGyef5ApycA7FSEOMraay9SaTk5zmbx7Tu+cJs4QKZg=
|
||||
modernc.org/opt v0.2.0/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
|
||||
modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8=
|
||||
modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns=
|
||||
modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w=
|
||||
modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE=
|
||||
modernc.org/sqlite v1.50.0 h1:eMowQSWLK0MeiQTdmz3lqoF5dqclujdlIKeJA11+7oM=
|
||||
modernc.org/sqlite v1.50.0/go.mod h1:m0w8xhwYUVY3H6pSDwc3gkJ/irZT/0YEXwBlhaxQEew=
|
||||
modernc.org/sqlite v1.49.1 h1:dYGHTKcX1sJ+EQDnUzvz4TJ5GbuvhNJa8Fg6ElGx73U=
|
||||
modernc.org/sqlite v1.49.1/go.mod h1:m0w8xhwYUVY3H6pSDwc3gkJ/irZT/0YEXwBlhaxQEew=
|
||||
modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0=
|
||||
modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A=
|
||||
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
|
||||
|
||||
@ -2,7 +2,6 @@ package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
@ -13,13 +12,12 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/crawlkit/embed"
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/discord"
|
||||
"github.com/openclaw/discrawl/internal/discorddesktop"
|
||||
"github.com/openclaw/discrawl/internal/share"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/openclaw/discrawl/internal/syncer"
|
||||
"github.com/steipete/discrawl/internal/config"
|
||||
"github.com/steipete/discrawl/internal/discord"
|
||||
"github.com/steipete/discrawl/internal/discorddesktop"
|
||||
"github.com/steipete/discrawl/internal/embed"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/syncer"
|
||||
)
|
||||
|
||||
type syncSources struct {
|
||||
@ -37,6 +35,8 @@ type syncRunStats struct {
|
||||
func (r *runtime) runInit(args []string) error {
|
||||
fs := flag.NewFlagSet("init", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
fromOpenClaw := fs.String("from-openclaw", "", "")
|
||||
account := fs.String("account", "", "")
|
||||
guildID := fs.String("guild", "", "")
|
||||
dbPath := fs.String("db", "", "")
|
||||
withEmbeddings := fs.Bool("with-embeddings", false, "")
|
||||
@ -44,6 +44,12 @@ func (r *runtime) runInit(args []string) error {
|
||||
return usageErr(err)
|
||||
}
|
||||
cfg := config.Default()
|
||||
if *fromOpenClaw != "" {
|
||||
cfg.Discord.OpenClawConfig = *fromOpenClaw
|
||||
}
|
||||
if *account != "" {
|
||||
cfg.Discord.Account = *account
|
||||
}
|
||||
if *dbPath != "" {
|
||||
cfg.DBPath = *dbPath
|
||||
}
|
||||
@ -83,6 +89,10 @@ func (r *runtime) runInit(args []string) error {
|
||||
}
|
||||
if *guildID != "" {
|
||||
cfg.DefaultGuildID = *guildID
|
||||
} else if info, err := config.LoadOpenClawDiscord(cfg.Discord.OpenClawConfig, cfg.Discord.Account); err == nil {
|
||||
if len(info.GuildIDs) == 1 {
|
||||
cfg.DefaultGuildID = info.GuildIDs[0]
|
||||
}
|
||||
}
|
||||
if cfg.DefaultGuildID == "" && len(cfg.GuildIDs) == 1 {
|
||||
cfg.DefaultGuildID = cfg.GuildIDs[0]
|
||||
@ -114,19 +124,9 @@ func (r *runtime) runSync(args []string) error {
|
||||
latestOnly := fs.Bool("latest-only", false, "")
|
||||
guildsFlag := fs.String("guilds", "", "")
|
||||
guildFlag := fs.String("guild", "", "")
|
||||
updateMode := fs.String("update", "", "")
|
||||
noUpdate := fs.Bool("no-update", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if *noUpdate && strings.TrimSpace(*updateMode) != "" && !strings.EqualFold(strings.TrimSpace(*updateMode), string(shareUpdateNever)) {
|
||||
return usageErr(errors.New("use either --no-update or --update, not both"))
|
||||
}
|
||||
if strings.TrimSpace(*updateMode) != "" {
|
||||
if _, err := parseShareUpdateMode(*updateMode); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
}
|
||||
sources, err := parseSyncSources(*source)
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
@ -143,7 +143,8 @@ func (r *runtime) runSync(args []string) error {
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
defaultLatest := defaultLatestSyncMode(*full, *allChannels, *since, *channels)
|
||||
defaultLatest := !*full && !*allChannels && *since == "" && *channels == ""
|
||||
latestMode := *latestOnly || defaultLatest
|
||||
opts := syncer.SyncOptions{
|
||||
Full: *full,
|
||||
GuildIDs: guildIDs,
|
||||
@ -151,18 +152,11 @@ func (r *runtime) runSync(args []string) error {
|
||||
Concurrency: *concurrency,
|
||||
Since: sinceTime,
|
||||
Embeddings: *withEmbeddings,
|
||||
SkipMembers: syncSkipsMembers(*skipMembers, defaultLatest),
|
||||
LatestOnly: syncLatestOnly(*latestOnly, defaultLatest),
|
||||
SkipMembers: *skipMembers || defaultLatest,
|
||||
LatestOnly: latestMode,
|
||||
}
|
||||
return r.withSyncLock(func() error {
|
||||
return r.runSyncLocked(sources, opts)
|
||||
})
|
||||
}
|
||||
|
||||
func (r *runtime) runSyncLocked(sources syncSources, opts syncer.SyncOptions) error {
|
||||
var apiStats *syncer.SyncStats
|
||||
if sources.discord {
|
||||
r.setSyncLockPhase("discord sync")
|
||||
shouldClose := r.client == nil
|
||||
if err := r.ensureDiscordServices(); err != nil {
|
||||
return err
|
||||
@ -178,11 +172,9 @@ func (r *runtime) runSyncLocked(sources syncSources, opts syncer.SyncOptions) er
|
||||
}
|
||||
var wiretapStats *discorddesktop.Stats
|
||||
if sources.wiretap {
|
||||
r.setSyncLockPhase("wiretap import")
|
||||
stats, err := discorddesktop.Import(r.ctx, r.store, discorddesktop.Options{
|
||||
Path: r.cfg.Desktop.Path,
|
||||
MaxFileBytes: r.cfg.Desktop.MaxFileBytes,
|
||||
FullCache: r.cfg.Desktop.FullCache,
|
||||
Now: r.now,
|
||||
})
|
||||
if err != nil {
|
||||
@ -199,18 +191,6 @@ func (r *runtime) runSyncLocked(sources syncSources, opts syncer.SyncOptions) er
|
||||
return r.print(syncRunStats{Source: sources.name, Discord: apiStats, Wiretap: wiretapStats})
|
||||
}
|
||||
|
||||
func defaultLatestSyncMode(full bool, allChannels bool, since string, channels string) bool {
|
||||
return !full && !allChannels && since == "" && channels == ""
|
||||
}
|
||||
|
||||
func syncLatestOnly(explicit bool, defaultLatest bool) bool {
|
||||
return explicit || defaultLatest
|
||||
}
|
||||
|
||||
func syncSkipsMembers(explicit bool, defaultLatest bool) bool {
|
||||
return explicit || defaultLatest
|
||||
}
|
||||
|
||||
func parseSyncSources(raw string) (syncSources, error) {
|
||||
normalized := strings.ToLower(strings.TrimSpace(raw))
|
||||
if normalized == "" {
|
||||
@ -264,23 +244,21 @@ func (r *runtime) runWiretap(args []string) error {
|
||||
fs.SetOutput(io.Discard)
|
||||
path := fs.String("path", r.cfg.Desktop.Path, "")
|
||||
maxFileBytes := fs.Int64("max-file-bytes", r.cfg.Desktop.MaxFileBytes, "")
|
||||
fullCache := fs.Bool("full-cache", r.cfg.Desktop.FullCache, "")
|
||||
dryRun := fs.Bool("dry-run", false, "")
|
||||
watchEvery := fs.Duration("watch-every", 0, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("wiretap takes flags only"))
|
||||
return usageErr(fmt.Errorf("wiretap takes flags only"))
|
||||
}
|
||||
if *maxFileBytes <= 0 {
|
||||
return usageErr(errors.New("--max-file-bytes must be positive"))
|
||||
return usageErr(fmt.Errorf("--max-file-bytes must be positive"))
|
||||
}
|
||||
runOnce := func(ctx context.Context) error {
|
||||
stats, err := discorddesktop.Import(ctx, r.store, discorddesktop.Options{
|
||||
Path: *path,
|
||||
MaxFileBytes: *maxFileBytes,
|
||||
FullCache: *fullCache,
|
||||
DryRun: *dryRun,
|
||||
Now: r.now,
|
||||
})
|
||||
@ -293,7 +271,7 @@ func (r *runtime) runWiretap(args []string) error {
|
||||
return runOnce(r.ctx)
|
||||
}
|
||||
if *watchEvery < time.Second {
|
||||
return usageErr(errors.New("--watch-every must be at least 1s"))
|
||||
return usageErr(fmt.Errorf("--watch-every must be at least 1s"))
|
||||
}
|
||||
ctx, stop := signal.NotifyContext(r.ctx, os.Interrupt, syscall.SIGTERM)
|
||||
defer stop()
|
||||
@ -315,37 +293,16 @@ func (r *runtime) runWiretap(args []string) error {
|
||||
}
|
||||
|
||||
func (r *runtime) runStatus(args []string) error {
|
||||
fs := flag.NewFlagSet("status", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
jsonOut := fs.Bool("json", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("status takes no arguments"))
|
||||
}
|
||||
if *jsonOut {
|
||||
r.json = true
|
||||
if len(args) != 0 {
|
||||
return usageErr(fmt.Errorf("status takes no arguments"))
|
||||
}
|
||||
dbPath, err := config.ExpandPath(r.cfg.DBPath)
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
status := store.Status{DBPath: dbPath, DefaultGuildID: r.cfg.EffectiveDefaultGuildID()}
|
||||
if r.store != nil {
|
||||
status, err = r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if r.json {
|
||||
needsUpdate := false
|
||||
if r.store != nil && r.cfg.ShareEnabled() {
|
||||
if staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter); err == nil {
|
||||
needsUpdate = share.NeedsImport(r.ctx, r.store, staleAfter)
|
||||
}
|
||||
}
|
||||
return r.print(controlStatus(r.configPath, r.cfg, status, needsUpdate))
|
||||
status, err := r.store.Status(r.ctx, dbPath, r.cfg.EffectiveDefaultGuildID())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(status)
|
||||
}
|
||||
@ -360,21 +317,21 @@ func (r *runtime) runEmbed(args []string) error {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("embed takes no positional arguments"))
|
||||
return usageErr(fmt.Errorf("embed takes no positional arguments"))
|
||||
}
|
||||
if *limit <= 0 {
|
||||
return usageErr(errors.New("--limit must be positive"))
|
||||
return usageErr(fmt.Errorf("--limit must be positive"))
|
||||
}
|
||||
if *batchSize <= 0 {
|
||||
return usageErr(errors.New("--batch-size must be positive"))
|
||||
return usageErr(fmt.Errorf("--batch-size must be positive"))
|
||||
}
|
||||
if !r.cfg.Search.Embeddings.Enabled {
|
||||
return usageErr(errors.New("embeddings are disabled in config"))
|
||||
return usageErr(fmt.Errorf("embeddings are disabled in config"))
|
||||
}
|
||||
providerFactory := r.newEmbed
|
||||
if providerFactory == nil {
|
||||
providerFactory = func(cfg config.EmbeddingsConfig) (embed.Provider, error) {
|
||||
return embed.NewProvider(crawlkitEmbeddingConfig(cfg))
|
||||
return embed.NewProvider(cfg)
|
||||
}
|
||||
}
|
||||
provider, err := providerFactory(r.cfg.Search.Embeddings)
|
||||
@ -406,17 +363,8 @@ func (r *runtime) runEmbed(args []string) error {
|
||||
}
|
||||
|
||||
func (r *runtime) runDoctor(args []string) error {
|
||||
fs := flag.NewFlagSet("doctor", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
jsonOut := fs.Bool("json", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("doctor takes no arguments"))
|
||||
}
|
||||
if *jsonOut {
|
||||
r.json = true
|
||||
if len(args) != 0 {
|
||||
return usageErr(fmt.Errorf("doctor takes no arguments"))
|
||||
}
|
||||
report := map[string]any{
|
||||
"config_path": r.configPath,
|
||||
@ -435,7 +383,7 @@ func (r *runtime) runDoctor(args []string) error {
|
||||
report["share_stale_after"] = cfg.Share.StaleAfter
|
||||
}
|
||||
if cfg.Search.Embeddings.Enabled {
|
||||
check := embed.CheckProvider(r.ctx, crawlkitEmbeddingConfig(cfg.Search.Embeddings))
|
||||
check := embed.CheckProvider(r.ctx, cfg.Search.Embeddings)
|
||||
report["embeddings"] = check.Status
|
||||
report["embeddings_provider"] = check.Provider
|
||||
report["embeddings_model"] = check.Model
|
||||
|
||||
@ -1,104 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/report"
|
||||
)
|
||||
|
||||
func (r *runtime) runAnalytics(args []string) error {
|
||||
if len(args) == 0 {
|
||||
printAnalyticsUsage(r.stdout)
|
||||
return nil
|
||||
}
|
||||
|
||||
subcommand := strings.TrimSpace(args[0])
|
||||
subArgs := args[1:]
|
||||
switch subcommand {
|
||||
case "quiet":
|
||||
return r.withLocalStoreRead(true, func() error {
|
||||
return r.runAnalyticsQuiet(subArgs)
|
||||
})
|
||||
case "trends":
|
||||
return r.withLocalStoreRead(true, func() error {
|
||||
return r.runAnalyticsTrends(subArgs)
|
||||
})
|
||||
default:
|
||||
return usageErr(fmt.Errorf("unknown analytics subcommand %q", subcommand))
|
||||
}
|
||||
}
|
||||
|
||||
func printAnalyticsUsage(w io.Writer) {
|
||||
_, _ = fmt.Fprintln(w, "Usage: discrawl analytics <subcommand> [flags]")
|
||||
_, _ = fmt.Fprintln(w)
|
||||
_, _ = fmt.Fprintln(w, "Subcommands:")
|
||||
_, _ = fmt.Fprintln(w, " quiet Channels with no activity in the lookback window.")
|
||||
_, _ = fmt.Fprintln(w, " trends Week-over-week message counts per channel.")
|
||||
}
|
||||
|
||||
func (r *runtime) runAnalyticsQuiet(args []string) error {
|
||||
fs := flag.NewFlagSet("analytics quiet", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
since := fs.String("since", "30d", "")
|
||||
guild := fs.String("guild", "", "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("analytics quiet takes no positional arguments"))
|
||||
}
|
||||
|
||||
lookback, err := parseLookback(*since)
|
||||
if err != nil {
|
||||
return usageErr(fmt.Errorf("parse --since: %w", err))
|
||||
}
|
||||
guildID := strings.TrimSpace(*guild)
|
||||
if guildID == "" {
|
||||
guildID = r.cfg.EffectiveDefaultGuildID()
|
||||
}
|
||||
|
||||
quiet, err := report.BuildQuiet(r.ctx, r.store, report.QuietOptions{
|
||||
Since: lookback,
|
||||
GuildID: guildID,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(quiet)
|
||||
}
|
||||
|
||||
func (r *runtime) runAnalyticsTrends(args []string) error {
|
||||
fs := flag.NewFlagSet("analytics trends", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
weeks := fs.Int("weeks", 8, "")
|
||||
guild := fs.String("guild", "", "")
|
||||
channel := fs.String("channel", "", "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("analytics trends takes no positional arguments"))
|
||||
}
|
||||
if *weeks < 0 {
|
||||
return usageErr(errors.New("--weeks must be zero or greater"))
|
||||
}
|
||||
|
||||
guildID := strings.TrimSpace(*guild)
|
||||
if guildID == "" {
|
||||
guildID = r.cfg.EffectiveDefaultGuildID()
|
||||
}
|
||||
|
||||
trends, err := report.BuildTrends(r.ctx, r.store, report.TrendsOptions{
|
||||
Weeks: *weeks,
|
||||
GuildID: guildID,
|
||||
Channel: *channel,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(trends)
|
||||
}
|
||||
@ -1,216 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestAnalyticsCommand(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
|
||||
require.NoError(t, seedAnalyticsCLIStore(ctx, dbPath))
|
||||
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = dbPath
|
||||
cfg.DefaultGuildID = "g1"
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
t.Run("analytics with no subcommand prints usage", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "analytics"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "Usage: discrawl analytics <subcommand> [flags]")
|
||||
require.Contains(t, out.String(), "quiet")
|
||||
require.Contains(t, out.String(), "trends")
|
||||
})
|
||||
|
||||
t.Run("analytics quiet json schema", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "analytics", "quiet", "--since", "30d"}, &out, &bytes.Buffer{}))
|
||||
|
||||
var payload map[string]any
|
||||
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
|
||||
require.Contains(t, payload, "generated_at")
|
||||
require.Contains(t, payload, "since")
|
||||
require.Contains(t, payload, "until")
|
||||
require.Contains(t, payload, "channels")
|
||||
|
||||
channels, ok := payload["channels"].([]any)
|
||||
require.True(t, ok)
|
||||
require.NotEmpty(t, channels)
|
||||
|
||||
first, ok := channels[0].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Contains(t, first, "channel_id")
|
||||
require.Contains(t, first, "channel_name")
|
||||
require.Contains(t, first, "guild_id")
|
||||
require.Contains(t, first, "days_silent")
|
||||
|
||||
totals, ok := payload["totals"].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Contains(t, totals, "channels")
|
||||
})
|
||||
|
||||
t.Run("analytics quiet human output", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "analytics", "quiet", "--since", "30d"}, &out, &bytes.Buffer{}))
|
||||
|
||||
text := out.String()
|
||||
require.Contains(t, text, "CHANNEL")
|
||||
require.Contains(t, text, "stale")
|
||||
require.Contains(t, text, "Window:")
|
||||
require.Contains(t, text, "Totals: channels=")
|
||||
})
|
||||
|
||||
t.Run("analytics quiet plain output", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--plain", "analytics", "quiet", "--since", "30d"}, &out, &bytes.Buffer{}))
|
||||
|
||||
require.Contains(t, out.String(), "c3\tstale\ttext\tg1\t")
|
||||
})
|
||||
|
||||
t.Run("analytics trends json schema", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "analytics", "trends", "--weeks", "4"}, &out, &bytes.Buffer{}))
|
||||
|
||||
var payload map[string]any
|
||||
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
|
||||
require.InEpsilon(t, 4, payload["weeks"], 0.001)
|
||||
require.Contains(t, payload, "rows")
|
||||
|
||||
rows, ok := payload["rows"].([]any)
|
||||
require.True(t, ok)
|
||||
require.NotEmpty(t, rows)
|
||||
|
||||
first, ok := rows[0].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Contains(t, first, "channel_id")
|
||||
require.Contains(t, first, "channel_name")
|
||||
require.Contains(t, first, "weekly")
|
||||
|
||||
weekly := first["weekly"].([]any)
|
||||
require.Len(t, weekly, 4)
|
||||
weekRow := weekly[0].(map[string]any)
|
||||
require.Contains(t, weekRow, "week_start")
|
||||
require.Contains(t, weekRow, "messages")
|
||||
})
|
||||
|
||||
t.Run("analytics trends human output", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "analytics", "trends", "--weeks", "4"}, &out, &bytes.Buffer{}))
|
||||
|
||||
text := out.String()
|
||||
require.Contains(t, text, "CHANNEL")
|
||||
require.Contains(t, text, "TOTAL")
|
||||
require.Contains(t, text, "general")
|
||||
require.Contains(t, text, "Window:")
|
||||
})
|
||||
|
||||
t.Run("analytics trends plain output", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--plain", "analytics", "trends", "--weeks", "4"}, &out, &bytes.Buffer{}))
|
||||
|
||||
require.Contains(t, out.String(), "g1\tc1\tgeneral\ttext\t")
|
||||
})
|
||||
|
||||
t.Run("unknown analytics subcommand returns usage error", func(t *testing.T) {
|
||||
err := Run(ctx, []string{"--config", cfgPath, "analytics", "unknown-sub"}, &bytes.Buffer{}, &bytes.Buffer{})
|
||||
require.Error(t, err)
|
||||
require.Equal(t, 2, ExitCode(err))
|
||||
})
|
||||
|
||||
t.Run("quiet validates its own flags", func(t *testing.T) {
|
||||
cases := [][]string{
|
||||
{"--config", cfgPath, "analytics", "quiet", "--bogus"},
|
||||
{"--config", cfgPath, "analytics", "quiet", "extra"},
|
||||
{"--config", cfgPath, "analytics", "trends", "--bogus"},
|
||||
{"--config", cfgPath, "analytics", "trends", "--weeks", "-1"},
|
||||
{"--config", cfgPath, "analytics", "trends", "extra"},
|
||||
}
|
||||
for _, args := range cases {
|
||||
err := Run(ctx, args, &bytes.Buffer{}, &bytes.Buffer{})
|
||||
require.Error(t, err)
|
||||
require.Equal(t, 2, ExitCode(err))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func seedAnalyticsCLIStore(ctx context.Context, path string) error {
|
||||
s, err := store.Open(ctx, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = s.Close() }()
|
||||
|
||||
now := time.Now().UTC()
|
||||
if err := s.UpsertGuild(ctx, store.GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g1", Kind: "text", Name: "incidents", RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c3", GuildID: "g1", Kind: "text", Name: "stale", RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c4", GuildID: "g1", Kind: "forum", Name: "never", RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.UpsertMessages(ctx, []store.MessageMutation{
|
||||
{
|
||||
Record: store.MessageRecord{
|
||||
ID: "m1",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "general",
|
||||
AuthorID: "u1",
|
||||
AuthorName: "Alice",
|
||||
CreatedAt: now.Add(-2 * time.Hour).Format(time.RFC3339Nano),
|
||||
Content: "hello",
|
||||
NormalizedContent: "hello",
|
||||
RawJSON: `{}`,
|
||||
},
|
||||
},
|
||||
{
|
||||
Record: store.MessageRecord{
|
||||
ID: "m2",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c2",
|
||||
ChannelName: "incidents",
|
||||
AuthorID: "u2",
|
||||
AuthorName: "Bob",
|
||||
CreatedAt: now.Add(-9 * 24 * time.Hour).Format(time.RFC3339Nano),
|
||||
Content: "incident",
|
||||
NormalizedContent: "incident",
|
||||
RawJSON: `{}`,
|
||||
},
|
||||
},
|
||||
{
|
||||
Record: store.MessageRecord{
|
||||
ID: "m3",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c3",
|
||||
ChannelName: "stale",
|
||||
AuthorID: "u1",
|
||||
AuthorName: "Alice",
|
||||
CreatedAt: now.Add(-45 * 24 * time.Hour).Format(time.RFC3339Nano),
|
||||
Content: "old",
|
||||
NormalizedContent: "old",
|
||||
RawJSON: `{}`,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
@ -11,12 +11,12 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/bwmarrin/discordgo"
|
||||
"github.com/openclaw/crawlkit/embed"
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/discord"
|
||||
"github.com/openclaw/discrawl/internal/share"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/openclaw/discrawl/internal/syncer"
|
||||
"github.com/steipete/discrawl/internal/config"
|
||||
"github.com/steipete/discrawl/internal/discord"
|
||||
"github.com/steipete/discrawl/internal/embed"
|
||||
"github.com/steipete/discrawl/internal/share"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/syncer"
|
||||
)
|
||||
|
||||
type cliError struct {
|
||||
@ -28,17 +28,10 @@ func (e *cliError) Error() string {
|
||||
return e.err.Error()
|
||||
}
|
||||
|
||||
func (e *cliError) Unwrap() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
func ExitCode(err error) int {
|
||||
if err == nil {
|
||||
return 0
|
||||
}
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return 1
|
||||
}
|
||||
var codeErr *cliError
|
||||
if errors.As(err, &codeErr) {
|
||||
return codeErr.code
|
||||
@ -47,10 +40,6 @@ func ExitCode(err error) int {
|
||||
}
|
||||
|
||||
func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
|
||||
if len(args) == 0 || args[0] == "help" || args[0] == "--help" || args[0] == "-h" {
|
||||
printUsage(stdout)
|
||||
return nil
|
||||
}
|
||||
global := flag.NewFlagSet("discrawl", flag.ContinueOnError)
|
||||
global.SetOutput(io.Discard)
|
||||
configPath := global.String("config", "", "")
|
||||
@ -70,14 +59,10 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
|
||||
return nil
|
||||
}
|
||||
rest := global.Args()
|
||||
if len(rest) == 0 || rest[0] == "help" || rest[0] == "--help" || rest[0] == "-h" {
|
||||
if len(rest) == 0 || rest[0] == "help" {
|
||||
printUsage(stdout)
|
||||
return nil
|
||||
}
|
||||
if rest[0] == "version" {
|
||||
_, _ = io.WriteString(stdout, version+"\n")
|
||||
return nil
|
||||
}
|
||||
level := slog.LevelInfo
|
||||
if *quiet {
|
||||
level = slog.LevelError
|
||||
@ -98,35 +83,22 @@ func Run(ctx context.Context, args []string, stdout, stderr io.Writer) error {
|
||||
}
|
||||
|
||||
type runtime struct {
|
||||
ctx context.Context
|
||||
configPath string
|
||||
cfg config.Config
|
||||
stdout io.Writer
|
||||
stderr io.Writer
|
||||
json bool
|
||||
plain bool
|
||||
logger *slog.Logger
|
||||
store *store.Store
|
||||
client discordClient
|
||||
syncer syncService
|
||||
dbLockHeld bool
|
||||
lockStarted time.Time
|
||||
openStore func(context.Context, string) (*store.Store, error)
|
||||
newDiscord func(config.Config) (discordClient, error)
|
||||
newSyncer func(syncer.Client, *store.Store, *slog.Logger) syncService
|
||||
newEmbed func(config.EmbeddingsConfig) (embed.Provider, error)
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
func crawlkitEmbeddingConfig(cfg config.EmbeddingsConfig) embed.Config {
|
||||
return embed.Config{
|
||||
Provider: cfg.Provider,
|
||||
Model: cfg.Model,
|
||||
BaseURL: cfg.BaseURL,
|
||||
APIKeyEnv: cfg.APIKeyEnv,
|
||||
RequestTimeout: cfg.RequestTimeout,
|
||||
MaxInputChars: cfg.MaxInputChars,
|
||||
}
|
||||
ctx context.Context
|
||||
configPath string
|
||||
cfg config.Config
|
||||
stdout io.Writer
|
||||
stderr io.Writer
|
||||
json bool
|
||||
plain bool
|
||||
logger *slog.Logger
|
||||
store *store.Store
|
||||
client discordClient
|
||||
syncer syncService
|
||||
openStore func(context.Context, string) (*store.Store, error)
|
||||
newDiscord func(config.Config) (discordClient, error)
|
||||
newSyncer func(syncer.Client, *store.Store, *slog.Logger) syncService
|
||||
newEmbed func(config.EmbeddingsConfig) (embed.Provider, error)
|
||||
now func() time.Time
|
||||
}
|
||||
|
||||
type discordClient interface {
|
||||
@ -148,65 +120,41 @@ type attachmentTextConfigurer interface {
|
||||
|
||||
func (r *runtime) dispatch(rest []string) error {
|
||||
switch rest[0] {
|
||||
case "metadata":
|
||||
return r.runMetadata(rest[1:])
|
||||
case "init":
|
||||
return r.runInit(rest[1:])
|
||||
case "sync":
|
||||
updateMode, err := syncShareUpdateMode(rest[1:])
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
return r.withLocalStoreUpdateLocked(updateMode, true, func() error { return r.runSync(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runSync(rest[1:]) })
|
||||
case "tail":
|
||||
return r.withServicesLocked(true, func() error { return r.runTail(rest[1:]) })
|
||||
return r.withServices(true, func() error { return r.runTail(rest[1:]) })
|
||||
case "wiretap":
|
||||
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
|
||||
case "tap", "cache-import":
|
||||
return r.withLocalStoreLocked(false, func() error { return r.runWiretap(rest[1:]) })
|
||||
return r.withLocalStoreDefault(false, func() error { return r.runWiretap(rest[1:]) })
|
||||
case "search":
|
||||
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
|
||||
return r.withLocalStoreRead(autoShareUpdate, func() error { return r.runSearch(rest[1:]) })
|
||||
case "tui":
|
||||
if hasHelpArg(rest[1:]) {
|
||||
return r.runTUI(rest[1:])
|
||||
}
|
||||
return r.withLocalStoreReadOnly(func() error { return r.runTUI(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runSearch(rest[1:]) })
|
||||
case "messages":
|
||||
if hasBoolFlag(rest[1:], "--sync") && !hasBoolFlag(rest[1:], "--dm") {
|
||||
return r.withServicesAutoLocked(true, true, true, func() error { return r.runMessages(rest[1:]) })
|
||||
if hasBoolFlag(rest[1:], "--sync") {
|
||||
return r.withServicesAuto(true, true, func() error { return r.runMessages(rest[1:]) })
|
||||
}
|
||||
autoShareUpdate := !hasBoolFlag(rest[1:], "--dm")
|
||||
return r.withLocalStoreRead(autoShareUpdate, func() error { return r.runMessages(rest[1:]) })
|
||||
case "digest":
|
||||
return r.withLocalStoreRead(true, func() error { return r.runDigest(rest[1:]) })
|
||||
case "analytics":
|
||||
return r.runAnalytics(rest[1:])
|
||||
case "dms":
|
||||
return r.withLocalStoreRead(false, func() error { return r.runDirectMessages(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runMessages(rest[1:]) })
|
||||
case "mentions":
|
||||
return r.withLocalStoreRead(true, func() error { return r.runMentions(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runMentions(rest[1:]) })
|
||||
case "embed":
|
||||
return r.withLocalStoreLocked(true, func() error { return r.runEmbed(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runEmbed(rest[1:]) })
|
||||
case "sql":
|
||||
if boolFlagEnabled(rest[1:], "--unsafe") {
|
||||
return r.withLocalStoreLocked(true, func() error { return r.runSQL(rest[1:]) })
|
||||
}
|
||||
return r.withLocalStoreRead(true, func() error { return r.runSQL(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runSQL(rest[1:]) })
|
||||
case "members":
|
||||
return r.withLocalStoreRead(true, func() error { return r.runMembers(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runMembers(rest[1:]) })
|
||||
case "channels":
|
||||
return r.withLocalStoreRead(true, func() error { return r.runChannels(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runChannels(rest[1:]) })
|
||||
case "status":
|
||||
return r.withLocalStoreReadOnly(func() error { return r.runStatus(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runStatus(rest[1:]) })
|
||||
case "report":
|
||||
return r.withLocalStoreRead(true, func() error { return r.runReport(rest[1:]) })
|
||||
return r.withLocalStoreDefault(true, func() error { return r.runReport(rest[1:]) })
|
||||
case "publish":
|
||||
return r.withServicesAutoLocked(false, false, true, func() error { return r.runPublish(rest[1:]) })
|
||||
return r.withServicesAuto(false, false, func() error { return r.runPublish(rest[1:]) })
|
||||
case "subscribe":
|
||||
return r.runSubscribe(rest[1:])
|
||||
case "update":
|
||||
return r.withServicesAutoLocked(false, false, true, func() error { return r.runUpdate(rest[1:]) })
|
||||
return r.withServicesAuto(false, false, func() error { return r.runUpdate(rest[1:]) })
|
||||
case "doctor":
|
||||
return r.runDoctor(rest[1:])
|
||||
default:
|
||||
@ -218,19 +166,7 @@ func (r *runtime) withServices(withDiscord bool, fn func() error) error {
|
||||
return r.withServicesAuto(withDiscord, !withDiscord, fn)
|
||||
}
|
||||
|
||||
func (r *runtime) withServicesLocked(withDiscord bool, fn func() error) error {
|
||||
return r.withServicesAutoLocked(withDiscord, !withDiscord, true, fn)
|
||||
}
|
||||
|
||||
func (r *runtime) withLocalStoreLocked(autoShareUpdate bool, fn func() error) error {
|
||||
return r.withLocalStoreUpdateLocked(boolShareUpdateMode(autoShareUpdate), true, fn)
|
||||
}
|
||||
|
||||
func (r *runtime) withLocalStoreRead(autoShareUpdate bool, fn func() error) error {
|
||||
return r.withLocalStoreReadUpdate(boolShareUpdateMode(autoShareUpdate), fn)
|
||||
}
|
||||
|
||||
func (r *runtime) withLocalStoreReadUpdate(updateMode shareUpdateMode, fn func() error) error {
|
||||
func (r *runtime) withLocalStoreDefault(autoShareUpdate bool, fn func() error) error {
|
||||
cfg, err := config.Load(r.configPath)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
@ -249,174 +185,24 @@ func (r *runtime) withLocalStoreReadUpdate(updateMode shareUpdateMode, fn func()
|
||||
return configErr(err)
|
||||
}
|
||||
r.cfg = cfg
|
||||
if r.shouldAutoUpdateShare(updateMode) {
|
||||
if err := r.autoUpdateShareIfLockAvailable(dbPath, updateMode); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return r.openLocalStoreReadOnly(dbPath, fn)
|
||||
}
|
||||
|
||||
func (r *runtime) withLocalStoreUpdateLocked(updateMode shareUpdateMode, lockDB bool, fn func() error) error {
|
||||
cfg, err := config.Load(r.configPath)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
return configErr(err)
|
||||
}
|
||||
cfg = config.Default()
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
}
|
||||
if err := config.EnsureRuntimeDirs(cfg); err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
dbPath, err := config.ExpandPath(cfg.DBPath)
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
r.cfg = cfg
|
||||
if lockDB {
|
||||
return r.withSyncLock(func() error {
|
||||
return r.openLocalStore(dbPath, updateMode, fn)
|
||||
})
|
||||
}
|
||||
return r.openLocalStore(dbPath, updateMode, fn)
|
||||
}
|
||||
|
||||
func (r *runtime) shouldAutoUpdateShare(mode shareUpdateMode) bool {
|
||||
return os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" &&
|
||||
r.cfg.ShareEnabled() &&
|
||||
(mode == shareUpdateForce || mode == shareUpdateAuto || (mode == shareUpdateConfigured && r.cfg.Share.AutoUpdate))
|
||||
}
|
||||
|
||||
func (r *runtime) autoUpdateShareIfLockAvailable(dbPath string, updateMode shareUpdateMode) error {
|
||||
locked, err := r.tryWithSyncLock(func() error {
|
||||
storeFactory := r.openStore
|
||||
if storeFactory == nil {
|
||||
storeFactory = store.Open
|
||||
}
|
||||
var openErr error
|
||||
r.store, openErr = storeFactory(r.ctx, dbPath)
|
||||
if openErr != nil {
|
||||
return dbErr(openErr)
|
||||
}
|
||||
defer func() {
|
||||
_ = r.store.Close()
|
||||
r.store = nil
|
||||
}()
|
||||
return r.autoUpdateShare(updateMode)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !locked {
|
||||
r.logger.Info("share update skipped; sync lock is held")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *runtime) openLocalStore(dbPath string, updateMode shareUpdateMode, fn func() error) error {
|
||||
storeFactory := r.openStore
|
||||
if storeFactory == nil {
|
||||
storeFactory = store.Open
|
||||
}
|
||||
var err error
|
||||
r.store, err = storeFactory(r.ctx, dbPath)
|
||||
if err != nil {
|
||||
return dbErr(err)
|
||||
}
|
||||
defer func() { _ = r.store.Close() }()
|
||||
if updateMode != shareUpdateNever && os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" {
|
||||
if err := r.autoUpdateShare(updateMode); err != nil {
|
||||
if autoShareUpdate && os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" {
|
||||
if err := r.autoUpdateShare(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return fn()
|
||||
}
|
||||
|
||||
func (r *runtime) withLocalStoreReadOnly(fn func() error) error {
|
||||
cfg, err := config.Load(r.configPath)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
return configErr(err)
|
||||
}
|
||||
cfg = config.Default()
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
}
|
||||
dbPath, err := config.ExpandPath(cfg.DBPath)
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
r.cfg = cfg
|
||||
var openErr error
|
||||
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
|
||||
if openErr != nil {
|
||||
if errors.Is(openErr, os.ErrNotExist) {
|
||||
r.store = nil
|
||||
return fn()
|
||||
}
|
||||
return dbErr(openErr)
|
||||
}
|
||||
defer func() { _ = r.store.Close() }()
|
||||
return fn()
|
||||
}
|
||||
|
||||
func (r *runtime) openLocalStoreReadOnly(dbPath string, fn func() error) error {
|
||||
var openErr error
|
||||
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
|
||||
if openErr != nil {
|
||||
if errors.Is(openErr, os.ErrNotExist) {
|
||||
storeFactory := r.openStore
|
||||
if storeFactory == nil {
|
||||
storeFactory = store.Open
|
||||
}
|
||||
r.store, openErr = storeFactory(r.ctx, dbPath)
|
||||
if openErr == nil {
|
||||
defer func() { _ = r.store.Close() }()
|
||||
return fn()
|
||||
}
|
||||
}
|
||||
if errors.Is(openErr, store.ErrSchemaVersionMismatch) {
|
||||
if err := r.withSyncLock(func() error {
|
||||
storeFactory := r.openStore
|
||||
if storeFactory == nil {
|
||||
storeFactory = store.Open
|
||||
}
|
||||
var migrateErr error
|
||||
r.store, migrateErr = storeFactory(r.ctx, dbPath)
|
||||
if migrateErr != nil {
|
||||
return dbErr(migrateErr)
|
||||
}
|
||||
closeErr := r.store.Close()
|
||||
r.store = nil
|
||||
return closeErr
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
r.store, openErr = store.OpenReadOnly(r.ctx, dbPath)
|
||||
if openErr == nil {
|
||||
defer func() { _ = r.store.Close() }()
|
||||
return fn()
|
||||
}
|
||||
}
|
||||
return dbErr(openErr)
|
||||
}
|
||||
defer func() { _ = r.store.Close() }()
|
||||
return fn()
|
||||
}
|
||||
|
||||
func (r *runtime) withServicesAuto(withDiscord, autoShareUpdate bool, fn func() error) error {
|
||||
return r.withServicesAutoLocked(withDiscord, autoShareUpdate, false, fn)
|
||||
}
|
||||
|
||||
func (r *runtime) withServicesAutoLocked(withDiscord, autoShareUpdate, lockDB bool, fn func() error) error {
|
||||
return r.withServicesUpdateLocked(withDiscord, boolShareUpdateMode(autoShareUpdate), lockDB, fn)
|
||||
}
|
||||
|
||||
func (r *runtime) withServicesUpdateLocked(withDiscord bool, updateMode shareUpdateMode, lockDB bool, fn func() error) error {
|
||||
cfg, err := config.Load(r.configPath)
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
@ -429,27 +215,17 @@ func (r *runtime) withServicesUpdateLocked(withDiscord bool, updateMode shareUpd
|
||||
return configErr(err)
|
||||
}
|
||||
r.cfg = cfg
|
||||
if lockDB {
|
||||
return r.withSyncLock(func() error {
|
||||
return r.openServices(dbPath, withDiscord, updateMode, fn)
|
||||
})
|
||||
}
|
||||
return r.openServices(dbPath, withDiscord, updateMode, fn)
|
||||
}
|
||||
|
||||
func (r *runtime) openServices(dbPath string, withDiscord bool, updateMode shareUpdateMode, fn func() error) error {
|
||||
storeFactory := r.openStore
|
||||
if storeFactory == nil {
|
||||
storeFactory = store.Open
|
||||
}
|
||||
var err error
|
||||
r.store, err = storeFactory(r.ctx, dbPath)
|
||||
if err != nil {
|
||||
return dbErr(err)
|
||||
}
|
||||
defer func() { _ = r.store.Close() }()
|
||||
if updateMode != shareUpdateNever && os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" {
|
||||
if err := r.autoUpdateShare(updateMode); err != nil {
|
||||
if autoShareUpdate && os.Getenv("DISCRAWL_NO_AUTO_UPDATE") != "1" {
|
||||
if err := r.autoUpdateShare(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -493,27 +269,24 @@ func (r *runtime) ensureDiscordServices() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *runtime) autoUpdateShare(mode shareUpdateMode) error {
|
||||
if !r.cfg.ShareEnabled() || (mode == shareUpdateConfigured && !r.cfg.Share.AutoUpdate) {
|
||||
func (r *runtime) autoUpdateShare() error {
|
||||
if !r.cfg.ShareEnabled() || !r.cfg.Share.AutoUpdate {
|
||||
return nil
|
||||
}
|
||||
staleAfter, err := time.ParseDuration(r.cfg.Share.StaleAfter)
|
||||
if err != nil {
|
||||
return configErr(fmt.Errorf("invalid share.stale_after: %w", err))
|
||||
}
|
||||
if mode != shareUpdateForce && !share.NeedsImport(r.ctx, r.store, staleAfter) {
|
||||
if !share.NeedsImport(r.ctx, r.store, staleAfter) {
|
||||
return nil
|
||||
}
|
||||
opts, err := r.shareOptions()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.setSyncLockPhase("share pull")
|
||||
r.logger.Info("share update pulling", "repo_path", opts.RepoPath, "remote", opts.Remote)
|
||||
if err := share.Pull(r.ctx, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
r.setSyncLockPhase("share import")
|
||||
_, _, err = share.ImportIfChanged(r.ctx, r.store, opts)
|
||||
if errors.Is(err, share.ErrNoManifest) {
|
||||
return nil
|
||||
@ -530,6 +303,5 @@ func (r *runtime) shareOptions() (share.Options, error) {
|
||||
RepoPath: repoPath,
|
||||
Remote: r.cfg.Share.Remote,
|
||||
Branch: r.cfg.Share.Branch,
|
||||
Progress: r.shareProgress,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -4,15 +4,12 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"io"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
goruntime "runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -20,13 +17,11 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
discordclient "github.com/openclaw/discrawl/internal/discord"
|
||||
"github.com/openclaw/discrawl/internal/discorddesktop"
|
||||
"github.com/openclaw/discrawl/internal/report"
|
||||
"github.com/openclaw/discrawl/internal/share"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/openclaw/discrawl/internal/syncer"
|
||||
"github.com/steipete/discrawl/internal/config"
|
||||
discordclient "github.com/steipete/discrawl/internal/discord"
|
||||
"github.com/steipete/discrawl/internal/share"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/syncer"
|
||||
)
|
||||
|
||||
func TestHelpAndVersion(t *testing.T) {
|
||||
@ -38,196 +33,10 @@ func TestHelpAndVersion(t *testing.T) {
|
||||
|
||||
out.Reset()
|
||||
require.NoError(t, Run(context.Background(), []string{"--version"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "0.7.0")
|
||||
require.Contains(t, out.String(), "0.4.1")
|
||||
|
||||
err := Run(context.Background(), []string{"bogus"}, &out, &bytes.Buffer{})
|
||||
require.Equal(t, 2, ExitCode(err))
|
||||
require.Equal(t, 1, ExitCode(context.Canceled))
|
||||
require.Equal(t, 7, ExitCode(&cliError{code: 7, err: errors.New("custom")}))
|
||||
}
|
||||
|
||||
func TestCommandValidationEdges(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = dbPath
|
||||
cfg.Discord.TokenSource = "none"
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
s, err := store.Open(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, s.Close())
|
||||
|
||||
cases := [][]string{
|
||||
{"--config", cfgPath, "--bogus"},
|
||||
{"--config", cfgPath, "search"},
|
||||
{"--config", cfgPath, "search", "--mode", "bogus", "term"},
|
||||
{"--config", cfgPath, "messages"},
|
||||
{"--config", cfgPath, "messages", "--hours", "-1", "--channel", "general"},
|
||||
{"--config", cfgPath, "messages", "--hours", "1", "--days", "1", "--channel", "general"},
|
||||
{"--config", cfgPath, "messages", "--all", "--last", "1", "--channel", "general"},
|
||||
{"--config", cfgPath, "messages", "--dm", "--sync", "--channel", "alice"},
|
||||
{"--config", cfgPath, "dms", "--hours", "-1"},
|
||||
{"--config", cfgPath, "dms", "--limit", "1", "--last", "1", "--with", "alice"},
|
||||
{"--config", cfgPath, "mentions"},
|
||||
{"--config", cfgPath, "mentions", "--days", "-1", "--target", "u1"},
|
||||
{"--config", cfgPath, "mentions", "--type", "channel", "--target", "u1"},
|
||||
{"--config", cfgPath, "digest", "--since", "-1d"},
|
||||
{"--config", cfgPath, "analytics", "wat"},
|
||||
{"--config", cfgPath, "analytics", "quiet", "extra"},
|
||||
{"--config", cfgPath, "analytics", "trends", "--weeks", "-1"},
|
||||
{"--config", cfgPath, "channels"},
|
||||
{"--config", cfgPath, "channels", "wat"},
|
||||
{"--config", cfgPath, "channels", "show"},
|
||||
{"--config", cfgPath, "status", "extra"},
|
||||
{"--config", cfgPath, "report", "extra"},
|
||||
{"--config", cfgPath, "wiretap", "extra"},
|
||||
{"--config", cfgPath, "wiretap", "--max-file-bytes", "0"},
|
||||
{"--config", cfgPath, "sync", "--source", "bogus"},
|
||||
{"--config", cfgPath, "sync", "--since", "not-time"},
|
||||
{"--config", cfgPath, "sync", "--no-update", "--update", "force"},
|
||||
{"--config", cfgPath, "publish", "--remote", ""},
|
||||
{"--config", cfgPath, "subscribe"},
|
||||
{"--config", cfgPath, "update", "extra"},
|
||||
{"--config", cfgPath, "sql", "--confirm", "select 1"},
|
||||
{"--config", cfgPath, "sql", "--unsafe", "select 1"},
|
||||
{"--config", cfgPath, "members"},
|
||||
{"--config", cfgPath, "members", "wat"},
|
||||
}
|
||||
for _, args := range cases {
|
||||
var stdout, stderr bytes.Buffer
|
||||
err := Run(ctx, args, &stdout, &stderr)
|
||||
require.Error(t, err, args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOutputBranches(t *testing.T) {
|
||||
now := time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC)
|
||||
values := []any{
|
||||
syncRunStats{
|
||||
Source: "both",
|
||||
Discord: &syncer.SyncStats{Guilds: 1, Channels: 2, Threads: 3, Members: 4, Messages: 5},
|
||||
Wiretap: &discorddesktop.Stats{
|
||||
Path: "/tmp/discord",
|
||||
FilesVisited: 1,
|
||||
FilesScanned: 2,
|
||||
FilesSkipped: 3,
|
||||
FilesUnchanged: 4,
|
||||
CacheFilesFastSkipped: 5,
|
||||
JSONObjects: 6,
|
||||
Guilds: 7,
|
||||
Channels: 8,
|
||||
Messages: 9,
|
||||
DMMessages: 10,
|
||||
DMChannels: 11,
|
||||
GuildMessages: 12,
|
||||
SkippedMessages: 13,
|
||||
SkippedChannels: 14,
|
||||
Checkpoints: 15,
|
||||
FullCache: true,
|
||||
DryRun: true,
|
||||
},
|
||||
},
|
||||
syncer.SyncStats{Guilds: 1, Channels: 2, Threads: 3, Members: 4, Messages: 5},
|
||||
discorddesktop.Stats{Path: "/tmp/discord", FilesVisited: 1, FullCache: true, DryRun: true},
|
||||
store.EmbeddingDrainStats{
|
||||
Processed: 3,
|
||||
Succeeded: 2,
|
||||
Failed: 1,
|
||||
Requeued: 4,
|
||||
RateLimited: true,
|
||||
RemainingBacklog: 5,
|
||||
Provider: "openai",
|
||||
Model: "model",
|
||||
InputVersion: "v1",
|
||||
},
|
||||
[]store.DirectMessageConversationRow{{
|
||||
ChannelID: "c1",
|
||||
Name: "Alice",
|
||||
MessageCount: 2,
|
||||
AuthorCount: 1,
|
||||
FirstMessageAt: now.Add(-time.Hour),
|
||||
LastMessageAt: now,
|
||||
}},
|
||||
store.MemberProfile{
|
||||
Member: store.MemberRow{
|
||||
GuildID: "g1",
|
||||
UserID: "u1",
|
||||
Username: "peter",
|
||||
DisplayName: "Peter",
|
||||
JoinedAt: now,
|
||||
XHandle: "steipete",
|
||||
GitHubLogin: "steipete",
|
||||
Website: "https://steipete.me",
|
||||
Pronouns: "he/him",
|
||||
Location: "Vienna",
|
||||
Bio: "Maintainer",
|
||||
URLs: []string{"https://example.com"},
|
||||
},
|
||||
MessageCount: 1,
|
||||
FirstMessageAt: now.Add(-time.Hour),
|
||||
LastMessageAt: now,
|
||||
RecentMessages: []store.MessageRow{{ChannelName: "general", CreatedAt: now, Content: "hello"}},
|
||||
},
|
||||
report.Digest{
|
||||
Since: now.Add(-24 * time.Hour),
|
||||
Until: now,
|
||||
WindowLabel: "1d",
|
||||
Channels: []report.ChannelDigest{{
|
||||
ChannelID: "c1",
|
||||
ChannelName: "general",
|
||||
Kind: "text",
|
||||
GuildID: "g1",
|
||||
Messages: 3,
|
||||
Replies: 1,
|
||||
ActiveAuthors: 2,
|
||||
TopPosters: []report.RankedCount{{Name: "Peter", Count: 2}},
|
||||
TopMentions: []report.RankedCount{{Count: 1}},
|
||||
}},
|
||||
Totals: report.DigestTotals{Messages: 3, Replies: 1, Channels: 1, ActiveAuthors: 2},
|
||||
},
|
||||
report.Quiet{
|
||||
Since: now.Add(-24 * time.Hour),
|
||||
Until: now,
|
||||
Channels: []report.QuietChannel{{
|
||||
ChannelID: "c1",
|
||||
ChannelName: "general",
|
||||
Kind: "text",
|
||||
LastMessage: "",
|
||||
DaysSilent: -1,
|
||||
}},
|
||||
Totals: report.QuietTotals{Channels: 1},
|
||||
},
|
||||
report.Trends{
|
||||
Since: now.AddDate(0, 0, -14),
|
||||
Until: now,
|
||||
Weeks: 2,
|
||||
Rows: []report.TrendsRow{{
|
||||
ChannelID: "c1",
|
||||
ChannelName: "general",
|
||||
Kind: "text",
|
||||
GuildID: "g1",
|
||||
Weekly: []report.WeeklyCount{
|
||||
{WeekStart: now.AddDate(0, 0, -14), Messages: 1},
|
||||
{WeekStart: now.AddDate(0, 0, -7), Messages: 2},
|
||||
},
|
||||
}},
|
||||
},
|
||||
map[string]any{"b": 2, "a": 1},
|
||||
}
|
||||
for _, value := range values {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, printHuman(&out, value))
|
||||
require.NotEmpty(t, out.String())
|
||||
}
|
||||
|
||||
var plain bytes.Buffer
|
||||
require.NoError(t, printPlain(&plain, report.Quiet{Channels: []report.QuietChannel{{ChannelID: "c1", ChannelName: "general", Kind: "text", GuildID: "g1", LastMessage: "now", DaysSilent: 0}}}))
|
||||
require.NoError(t, printPlain(&plain, report.Trends{Rows: []report.TrendsRow{{GuildID: "g1", ChannelID: "c1", ChannelName: "general", Kind: "text", Weekly: []report.WeeklyCount{{WeekStart: now, Messages: 2}}}}}))
|
||||
require.Error(t, printPlain(io.Discard, struct{}{}))
|
||||
require.Error(t, printHuman(io.Discard, struct{}{}))
|
||||
require.Equal(t, "this is a profile field with a very l...", trimForTable("this is a profile field with a very long text value"))
|
||||
}
|
||||
|
||||
func TestStatusSearchSQLAndListings(t *testing.T) {
|
||||
@ -266,21 +75,6 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
|
||||
NormalizedContent: "panic locked database",
|
||||
RawJSON: `{}`,
|
||||
}))
|
||||
require.NoError(t, s.UpsertGuild(ctx, store.GuildRecord{ID: "g2", Name: "Other Guild", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g2", Kind: "text", Name: "random", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{
|
||||
ID: "m-other",
|
||||
GuildID: "g2",
|
||||
ChannelID: "c2",
|
||||
ChannelName: "random",
|
||||
AuthorID: "u2",
|
||||
AuthorName: "Outside",
|
||||
MessageType: 0,
|
||||
CreatedAt: time.Now().UTC().Add(-time.Hour).Format(time.RFC3339Nano),
|
||||
Content: "outside default guild",
|
||||
NormalizedContent: "outside default guild",
|
||||
RawJSON: `{}`,
|
||||
}))
|
||||
require.NoError(t, s.UpsertMessage(ctx, store.MessageRecord{
|
||||
ID: "m2",
|
||||
GuildID: "g1",
|
||||
@ -325,7 +119,6 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
|
||||
tests := [][]string{
|
||||
{"--config", cfgPath, "status"},
|
||||
{"--config", cfgPath, "search", "panic"},
|
||||
{"--config", cfgPath, "search", "panic", "--limit", "1"},
|
||||
{"--config", cfgPath, "search", "stack"},
|
||||
{"--config", cfgPath, "search", "--include-empty", "Peter"},
|
||||
{"--config", cfgPath, "messages", "--channel", "general", "--days", "7", "--all"},
|
||||
@ -343,100 +136,6 @@ func TestStatusSearchSQLAndListings(t *testing.T) {
|
||||
require.NoError(t, Run(ctx, args, &out, &bytes.Buffer{}))
|
||||
require.NotEmpty(t, out.String())
|
||||
}
|
||||
|
||||
for _, args := range [][]string{
|
||||
{"--config", cfgPath, "metadata", "--json"},
|
||||
{"--config", cfgPath, "status", "--json"},
|
||||
} {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, args, &out, &bytes.Buffer{}))
|
||||
var payload map[string]any
|
||||
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
|
||||
require.NotEmpty(t, payload)
|
||||
}
|
||||
|
||||
before, err := os.ReadFile(dbPath)
|
||||
require.NoError(t, err)
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "tui", "--limit", "5"}, &out, &bytes.Buffer{}))
|
||||
var rows []map[string]any
|
||||
require.NoError(t, json.Unmarshal(out.Bytes(), &rows))
|
||||
require.NotEmpty(t, rows)
|
||||
require.Equal(t, "panic locked database", rows[0]["title"])
|
||||
require.Equal(t, "discord", rows[0]["source"])
|
||||
require.Equal(t, "message", rows[0]["kind"])
|
||||
require.Equal(t, "Guild", rows[0]["scope"])
|
||||
require.Equal(t, "general", rows[0]["container"])
|
||||
require.Equal(t, "https://discord.com/channels/g1/c1/m1", rows[0]["url"])
|
||||
after, err := os.ReadFile(dbPath)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, before, after, "tui --json should not mutate the database")
|
||||
}
|
||||
|
||||
func TestTUIHelpReturnsUsage(t *testing.T) {
|
||||
var stdout bytes.Buffer
|
||||
var stderr bytes.Buffer
|
||||
|
||||
require.NoError(t, Run(context.Background(), []string{"tui", "--help"}, &stdout, &stderr))
|
||||
require.Contains(t, stdout.String(), "Usage of tui:")
|
||||
require.Contains(t, stdout.String(), "-limit")
|
||||
require.Contains(t, stdout.String(), "right-click")
|
||||
require.Contains(t, stdout.String(), "# jump")
|
||||
require.Empty(t, stderr.String())
|
||||
}
|
||||
|
||||
func TestControlStatusIncludesShareAndFileSizes(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
require.NoError(t, os.WriteFile(dbPath, []byte("db"), 0o600))
|
||||
require.NoError(t, os.WriteFile(dbPath+"-wal", []byte("wal"), 0o600))
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = dbPath
|
||||
cfg.Share.Remote = "https://github.com/openclaw/discrawl-share.git"
|
||||
cfg.Share.RepoPath = filepath.Join(dir, "share")
|
||||
status := store.Status{
|
||||
DBPath: dbPath,
|
||||
MessageCount: 5,
|
||||
ChannelCount: 2,
|
||||
}
|
||||
|
||||
out := controlStatus(filepath.Join(dir, "config.toml"), cfg, status, true)
|
||||
require.Equal(t, int64(2), out.DatabaseBytes)
|
||||
require.Equal(t, int64(3), out.WALBytes)
|
||||
require.Zero(t, fileSize(filepath.Join(dir, "missing.db")))
|
||||
require.NotNil(t, out.Share)
|
||||
require.True(t, out.Share.Enabled)
|
||||
require.True(t, out.Share.NeedsUpdate)
|
||||
require.Contains(t, out.Summary, "5 messages")
|
||||
}
|
||||
|
||||
func TestFormattingAndTUISourceBranches(t *testing.T) {
|
||||
require.Equal(t, "-", formatDaysSilent(-1))
|
||||
require.Equal(t, "4", formatDaysSilent(4))
|
||||
require.Equal(t, "0", formatWindowDuration(0))
|
||||
require.Equal(t, "2d", formatWindowDuration(48*time.Hour))
|
||||
require.Equal(t, "3h", formatWindowDuration(3*time.Hour))
|
||||
require.Equal(t, "1h30m0s", formatWindowDuration(90*time.Minute))
|
||||
require.Equal(t, 6*time.Hour, mustDuration("bogus"))
|
||||
require.Equal(t, 15*time.Minute, mustDuration("15m"))
|
||||
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = "/tmp/discrawl.db"
|
||||
r := &runtime{cfg: cfg}
|
||||
require.Equal(t, "local", r.archiveSourceKind())
|
||||
require.Equal(t, cfg.DBPath, r.archiveSourceLocation())
|
||||
guilds, err := r.resolveTUIGuilds(false, "", "")
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, guilds)
|
||||
|
||||
r.cfg.DefaultGuildID = "guild-one"
|
||||
guilds, err = r.resolveTUIGuilds(false, "", "")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, []string{"guild-one"}, guilds)
|
||||
|
||||
r.cfg.Share.Remote = "https://github.com/openclaw/discrawl-share.git"
|
||||
require.Equal(t, "remote", r.archiveSourceKind())
|
||||
require.Equal(t, r.cfg.Share.Remote, r.archiveSourceLocation())
|
||||
}
|
||||
|
||||
func TestWiretapImportsDesktopDirectMessages(t *testing.T) {
|
||||
@ -463,97 +162,6 @@ func TestWiretapImportsDesktopDirectMessages(t *testing.T) {
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "launch"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "secret DM launch plan")
|
||||
require.Contains(t, out.String(), "@me")
|
||||
|
||||
out.Reset()
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "dms"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "Alice")
|
||||
require.Contains(t, out.String(), "111111111111111111")
|
||||
|
||||
out.Reset()
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "dms", "--with", "Alice", "--last", "1"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "secret DM launch plan")
|
||||
require.Contains(t, out.String(), "@me")
|
||||
|
||||
out.Reset()
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "--dm", "launch"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "secret DM launch plan")
|
||||
|
||||
out.Reset()
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "messages", "--dm", "--channel", "Alice", "--last", "1"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "secret DM launch plan")
|
||||
}
|
||||
|
||||
func TestDiscordTUIRowsIncludePaneMetadata(t *testing.T) {
|
||||
rows := discordTUIRows([]store.MessageRow{{
|
||||
MessageID: "m1",
|
||||
GuildID: "@me",
|
||||
GuildName: "Discord Direct Messages",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "Vincent K",
|
||||
AuthorID: "u1",
|
||||
AuthorName: "Peter",
|
||||
Content: "hello from desktop",
|
||||
DisplayContent: "hello from Vincent",
|
||||
CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC),
|
||||
ReplyToMessage: "m0",
|
||||
HasAttachments: true,
|
||||
AttachmentNames: "trace.txt",
|
||||
AttachmentText: "stack trace line one",
|
||||
Pinned: true,
|
||||
}})
|
||||
require.Len(t, rows, 1)
|
||||
require.Equal(t, "hello from Vincent", rows[0].Title)
|
||||
require.Contains(t, rows[0].Detail, "hello from Vincent")
|
||||
require.Contains(t, rows[0].Detail, "Attachments")
|
||||
require.Contains(t, rows[0].Detail, "stack trace line one")
|
||||
require.Equal(t, "hello from Vincent", rows[0].Text)
|
||||
require.Equal(t, "Direct messages", rows[0].Scope)
|
||||
require.Equal(t, "Vincent K", rows[0].Container)
|
||||
require.Contains(t, rows[0].Tags, "dm")
|
||||
require.Equal(t, "true", rows[0].Fields["attachments"])
|
||||
require.Equal(t, "trace.txt", rows[0].Fields["attachment_names"])
|
||||
require.Equal(t, "true", rows[0].Fields["pinned"])
|
||||
require.Equal(t, "m0", rows[0].Fields["reply_to"])
|
||||
require.Equal(t, "@me", rows[0].Fields["guild_id"])
|
||||
|
||||
rows = discordTUIRows([]store.MessageRow{{
|
||||
MessageID: "m2",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c2",
|
||||
AuthorID: "439223656200273932",
|
||||
Content: "desktop-only author",
|
||||
CreatedAt: time.Date(2026, 5, 2, 12, 0, 0, 0, time.UTC),
|
||||
Source: "discord_desktop",
|
||||
}})
|
||||
require.Equal(t, "user:439223...3932", rows[0].Author)
|
||||
require.Equal(t, "DM c2", discordContainerLabel(store.MessageRow{GuildID: "@me", ChannelID: "c2"}))
|
||||
require.Contains(t, rows[0].Tags, "discord_desktop")
|
||||
}
|
||||
|
||||
func TestParseMessageWindow(t *testing.T) {
|
||||
rt := &runtime{now: func() time.Time {
|
||||
return time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)
|
||||
}}
|
||||
|
||||
since, before, err := rt.parseMessageWindow(6, 0, "", "")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, time.Date(2026, 4, 24, 6, 0, 0, 0, time.UTC), since)
|
||||
require.True(t, before.IsZero())
|
||||
|
||||
since, before, err = rt.parseMessageWindow(0, 2, "", "")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, time.Date(2026, 4, 22, 12, 0, 0, 0, time.UTC), since)
|
||||
require.True(t, before.IsZero())
|
||||
|
||||
since, before, err = rt.parseMessageWindow(0, 0, "2026-04-20T10:00:00Z", "2026-04-21T10:00:00Z")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, time.Date(2026, 4, 20, 10, 0, 0, 0, time.UTC), since)
|
||||
require.Equal(t, time.Date(2026, 4, 21, 10, 0, 0, 0, time.UTC), before)
|
||||
|
||||
_, _, err = rt.parseMessageWindow(0, 0, "bad", "")
|
||||
require.Equal(t, 2, ExitCode(err))
|
||||
_, _, err = rt.parseMessageWindow(0, 0, "", "bad")
|
||||
require.Equal(t, 2, ExitCode(err))
|
||||
}
|
||||
|
||||
func TestWiretapAndSearchWorkWithoutConfig(t *testing.T) {
|
||||
@ -789,7 +397,7 @@ func TestShareCommandsRoundTripEmbeddings(t *testing.T) {
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
publisher := seedCLIStore(t, cfg.DBPath)
|
||||
require.NoError(t, insertCLIEmbedding(ctx, publisher, "m100", "openai_compatible", "local-model", []float32{1, 0}))
|
||||
require.NoError(t, insertCLIEmbedding(ctx, publisher, "m1", "openai_compatible", "local-model", []float32{1, 0}))
|
||||
require.NoError(t, publisher.Close())
|
||||
require.NoError(t, os.MkdirAll(cfg.Share.RepoPath, 0o755))
|
||||
runGit(t, cfg.Share.RepoPath, "init")
|
||||
@ -947,7 +555,7 @@ func TestShareUpdateImportsNewRemoteSnapshot(t *testing.T) {
|
||||
require.Contains(t, out.String(), "newer git snapshot arrived")
|
||||
}
|
||||
|
||||
func TestSyncSkipsGitShareByDefaultAndCanImportBeforeLiveDiscord(t *testing.T) {
|
||||
func TestSyncImportsGitShareBeforeLiveDiscord(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
remoteRepo := filepath.Join(dir, "remote.git")
|
||||
@ -968,8 +576,6 @@ func TestSyncSkipsGitShareByDefaultAndCanImportBeforeLiveDiscord(t *testing.T) {
|
||||
cfg.Share.RepoPath = filepath.Join(dir, "reader-share")
|
||||
cfg.Share.AutoUpdate = true
|
||||
cfg.Share.StaleAfter = "15m"
|
||||
cfg.Desktop.Path = filepath.Join(dir, "empty-discord")
|
||||
require.NoError(t, os.MkdirAll(cfg.Desktop.Path, 0o755))
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
hybrid := &hybridSyncService{}
|
||||
@ -990,127 +596,21 @@ func TestSyncSkipsGitShareByDefaultAndCanImportBeforeLiveDiscord(t *testing.T) {
|
||||
}
|
||||
|
||||
require.NoError(t, rt.dispatch([]string{"sync", "--all"}))
|
||||
require.False(t, hybrid.sawGitMessage)
|
||||
require.True(t, hybrid.sawGitMessage)
|
||||
|
||||
reader, err := store.Open(ctx, cfg.DBPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = reader.Close() }()
|
||||
rows, err := reader.ListMessages(ctx, store.MessageListOptions{Channel: "general", IncludeEmpty: true})
|
||||
require.NoError(t, err)
|
||||
contents := make([]string, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
contents = append(contents, row.Content)
|
||||
}
|
||||
require.NotContains(t, contents, "automatic updates work")
|
||||
require.Contains(t, contents, "live discord filled the delta")
|
||||
require.NoError(t, reader.Close())
|
||||
|
||||
hybrid.sawGitMessage = false
|
||||
require.NoError(t, rt.dispatch([]string{"sync", "--all", "--update=auto"}))
|
||||
require.True(t, hybrid.sawGitMessage)
|
||||
|
||||
reader, err = store.Open(ctx, cfg.DBPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = reader.Close() }()
|
||||
rows, err = reader.ListMessages(ctx, store.MessageListOptions{Channel: "general", IncludeEmpty: true})
|
||||
require.NoError(t, err)
|
||||
contents = contents[:0]
|
||||
for _, row := range rows {
|
||||
contents = append(contents, row.Content)
|
||||
}
|
||||
require.Contains(t, contents, "automatic updates work")
|
||||
require.Contains(t, contents, "live discord filled the delta")
|
||||
}
|
||||
|
||||
func TestSyncLockSerializesConcurrentRuns(t *testing.T) {
|
||||
if goruntime.GOOS == "windows" {
|
||||
t.Skip("sync lock is currently a no-op on Windows")
|
||||
}
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = filepath.Join(dir, "discrawl.db")
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
rt := &runtime{
|
||||
ctx: ctx,
|
||||
configPath: cfgPath,
|
||||
cfg: cfg,
|
||||
}
|
||||
firstRelease, err := acquireSyncLock(ctx, filepath.Join(dir, ".discrawl-sync.lock"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = firstRelease() }()
|
||||
|
||||
waitCtx, cancel := context.WithTimeout(ctx, 25*time.Millisecond)
|
||||
defer cancel()
|
||||
rt.ctx = waitCtx
|
||||
err = rt.withSyncLock(func() error { return nil })
|
||||
require.ErrorIs(t, err, context.DeadlineExceeded)
|
||||
|
||||
waitCtx, cancel = context.WithTimeout(ctx, 25*time.Millisecond)
|
||||
defer cancel()
|
||||
rt.ctx = waitCtx
|
||||
err = rt.dispatch([]string{"update"})
|
||||
require.ErrorIs(t, err, context.DeadlineExceeded)
|
||||
}
|
||||
|
||||
func TestReadCommandsDoNotWaitForSyncLock(t *testing.T) {
|
||||
if goruntime.GOOS == "windows" {
|
||||
t.Skip("sync lock timing is flaky on Windows")
|
||||
}
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = filepath.Join(dir, "discrawl.db")
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
s := seedCLIStore(t, cfg.DBPath)
|
||||
require.NoError(t, s.Close())
|
||||
|
||||
firstRelease, err := acquireSyncLock(ctx, filepath.Join(dir, ".discrawl-sync.lock"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = firstRelease() }()
|
||||
|
||||
for _, args := range [][]string{
|
||||
{"--config", cfgPath, "search", "automatic"},
|
||||
{"--config", cfgPath, "messages", "--channel", "general", "--last", "1"},
|
||||
{"--config", cfgPath, "sql", "select count(*) as total from messages"},
|
||||
} {
|
||||
runCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
|
||||
var out bytes.Buffer
|
||||
err := Run(runCtx, args, &out, &bytes.Buffer{})
|
||||
cancel()
|
||||
require.NoError(t, err, args)
|
||||
require.NotEmpty(t, out.String(), args)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadCommandsMigrateOlderLocalStore(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = filepath.Join(dir, "discrawl.db")
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
s := seedCLIStore(t, cfg.DBPath)
|
||||
_, err := s.DB().ExecContext(ctx, `pragma user_version = 1`)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, s.Close())
|
||||
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "search", "automatic"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "automatic updates work")
|
||||
|
||||
reader, err := store.OpenReadOnly(ctx, cfg.DBPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = reader.Close() }()
|
||||
var version int
|
||||
require.NoError(t, reader.DB().QueryRowContext(ctx, `pragma user_version`).Scan(&version))
|
||||
require.Equal(t, 2, version)
|
||||
}
|
||||
|
||||
func seedCLIStore(t *testing.T, path string) *store.Store {
|
||||
t.Helper()
|
||||
ctx := context.Background()
|
||||
@ -1150,7 +650,7 @@ func publishSnapshot(t *testing.T, ctx context.Context, s *store.Store, opts sha
|
||||
func runGit(t *testing.T, dir string, args ...string) {
|
||||
t.Helper()
|
||||
// #nosec G204 -- fixed git argv in test setup.
|
||||
cmd := exec.CommandContext(t.Context(), "git", args...)
|
||||
cmd := exec.Command("git", args...)
|
||||
cmd.Dir = dir
|
||||
out, err := cmd.CombinedOutput()
|
||||
require.NoError(t, err, string(out))
|
||||
@ -1663,16 +1163,14 @@ func TestRuntimeInitSyncTailAndDoctor(t *testing.T) {
|
||||
}
|
||||
|
||||
rt := newRuntime()
|
||||
require.NoError(t, rt.runInit([]string{"--db", dbPath, "--with-embeddings", "--guild", "g2"}))
|
||||
require.NoError(t, rt.runInit([]string{"--db", dbPath, "--with-embeddings", "--guild", "g2", "--account", "atlas"}))
|
||||
|
||||
cfg, err := config.Load(cfgPath)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, []string{"g1", "g2"}, cfg.GuildIDs)
|
||||
require.Equal(t, "g2", cfg.DefaultGuildID)
|
||||
require.Equal(t, "atlas", cfg.Discord.Account)
|
||||
require.True(t, cfg.Search.Embeddings.Enabled)
|
||||
cfg.Desktop.Path = filepath.Join(dir, "empty-discord")
|
||||
require.NoError(t, os.MkdirAll(cfg.Desktop.Path, 0o755))
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
rt = newRuntime()
|
||||
require.NoError(t, rt.withServices(true, func() error { return rt.runSync([]string{"--guilds", "g2"}) }))
|
||||
@ -1705,42 +1203,6 @@ func TestRuntimeInitSyncTailAndDoctor(t *testing.T) {
|
||||
require.Contains(t, out.String(), "discord_auth=ok")
|
||||
}
|
||||
|
||||
func TestSyncModeDefaults(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
full bool
|
||||
allChannels bool
|
||||
since string
|
||||
channels string
|
||||
defaultLatest bool
|
||||
latestOnly bool
|
||||
skipMembers bool
|
||||
explicitLatest bool
|
||||
explicitSkip bool
|
||||
}{
|
||||
{name: "routine", defaultLatest: true, latestOnly: true, skipMembers: true},
|
||||
{name: "all channels", allChannels: true},
|
||||
{name: "full", full: true},
|
||||
{name: "since", since: "2026-04-27T20:00:00Z"},
|
||||
{name: "channels", channels: "c1"},
|
||||
{name: "explicit latest", allChannels: true, explicitLatest: true, latestOnly: true},
|
||||
{name: "explicit skip members", allChannels: true, explicitSkip: true, skipMembers: true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
defaultLatest := defaultLatestSyncMode(tt.full, tt.allChannels, tt.since, tt.channels)
|
||||
require.Equal(t, tt.defaultLatest, defaultLatest)
|
||||
require.Equal(t, tt.latestOnly, syncLatestOnly(tt.explicitLatest, defaultLatest))
|
||||
require.Equal(t, tt.skipMembers, syncSkipsMembers(tt.explicitSkip, defaultLatest))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoctorChecksEnabledLocalEmbeddingProvider(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
@ -1856,7 +1318,7 @@ func TestRuntimeConfiguresAttachmentTextOnSyncer(t *testing.T) {
|
||||
require.NoError(t, rt.withServices(true, func() error { return nil }))
|
||||
require.True(t, fakeSync.attachmentTextEnabled)
|
||||
|
||||
cfg.Sync.AttachmentText = new(false)
|
||||
cfg.Sync.AttachmentText = ptrBool(false)
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
require.NoError(t, rt.withServices(true, func() error { return nil }))
|
||||
require.False(t, fakeSync.attachmentTextEnabled)
|
||||
@ -1929,13 +1391,6 @@ func TestCommandUsageBranches(t *testing.T) {
|
||||
{[]string{"--config", cfgPath, "sql", "--confirm", "select 1"}, "--confirm requires --unsafe"},
|
||||
{[]string{"--config", cfgPath, "sql", "--unsafe", "delete from messages"}, "--unsafe requires --confirm"},
|
||||
{[]string{"--config", cfgPath, "search"}, "search requires a query"},
|
||||
{[]string{"--config", cfgPath, "search", "--dm", "--guild", "g1", "panic"}, "use either --dm or --guild/--guilds"},
|
||||
{[]string{"--config", cfgPath, "messages", "--dm", "--guild", "g1"}, "use either --dm or --guild/--guilds"},
|
||||
{[]string{"--config", cfgPath, "messages", "--dm", "--sync"}, "messages --sync is not supported with --dm"},
|
||||
{[]string{"--config", cfgPath, "dms", "extra"}, "dms takes flags only"},
|
||||
{[]string{"--config", cfgPath, "wiretap", "extra"}, "wiretap takes flags only"},
|
||||
{[]string{"--config", cfgPath, "wiretap", "--max-file-bytes", "0"}, "--max-file-bytes must be positive"},
|
||||
{[]string{"--config", cfgPath, "wiretap", "--watch-every", "1ms"}, "--watch-every must be at least 1s"},
|
||||
{[]string{"--config", cfgPath, "members"}, "members requires a subcommand"},
|
||||
{[]string{"--config", cfgPath, "members", "search"}, "members search requires a query"},
|
||||
{[]string{"--config", cfgPath, "members", "bogus"}, `unknown members subcommand "bogus"`},
|
||||
@ -1948,8 +1403,7 @@ func TestCommandUsageBranches(t *testing.T) {
|
||||
{[]string{"--config", cfgPath, "embed", "--batch-size", "0"}, "--batch-size must be positive"},
|
||||
{[]string{"--config", cfgPath, "publish", "extra"}, "publish takes no positional arguments"},
|
||||
{[]string{"--config", cfgPath, "update", "extra"}, "update takes no positional arguments"},
|
||||
{[]string{"--config", cfgPath, "subscribe"}, "subscribe requires one remote"},
|
||||
{[]string{"--config", cfgPath, "subscribe", "one", "two"}, "subscribe requires one remote"},
|
||||
{[]string{"--config", cfgPath, "subscribe", "one", "two"}, "subscribe takes at most one remote"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
err := Run(ctx, tc.args, &bytes.Buffer{}, &bytes.Buffer{})
|
||||
@ -1963,29 +1417,11 @@ func TestHelpers(t *testing.T) {
|
||||
|
||||
require.Equal(t, []string{"a", "b"}, csvList("a,b,a"))
|
||||
require.Equal(t, "x", (&cliError{code: 2, err: assertErr("x")}).Error())
|
||||
mode, err := syncShareUpdateMode([]string{"--all"})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, shareUpdateNever, mode)
|
||||
mode, err = syncShareUpdateMode([]string{"--update=auto"})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, shareUpdateAuto, mode)
|
||||
mode, err = syncShareUpdateMode([]string{"--update", "force"})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, shareUpdateForce, mode)
|
||||
_, err = syncShareUpdateMode([]string{"--update"})
|
||||
require.Error(t, err)
|
||||
require.Equal(t, 2, ExitCode(usageErr(assertErr("x"))))
|
||||
require.Equal(t, 4, ExitCode(authErr(assertErr("x"))))
|
||||
require.Equal(t, 5, ExitCode(dbErr(assertErr("x"))))
|
||||
require.Equal(t, 3, ExitCode(configErr(assertErr("x"))))
|
||||
require.Equal(t, 1, ExitCode(assertErr("x")))
|
||||
require.True(t, hybridSemanticUnavailable(store.ErrNoCompatibleEmbeddings))
|
||||
require.True(t, hybridSemanticUnavailable(assertErr("semantic query embedding missing")))
|
||||
require.False(t, hybridSemanticUnavailable(assertErr("other")))
|
||||
opts, err := shareOptionsFromFlags("~/share", "git@example.com:org/archive.git", "")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "git@example.com:org/archive.git", opts.Remote)
|
||||
require.Equal(t, "main", opts.Branch)
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, printHuman(&out, syncer.SyncStats{Guilds: 1}))
|
||||
require.Contains(t, out.String(), "guilds=1")
|
||||
@ -2001,6 +1437,10 @@ func discardLogger() *slog.Logger {
|
||||
return slog.New(slog.DiscardHandler)
|
||||
}
|
||||
|
||||
func ptrBool(value bool) *bool {
|
||||
return &value
|
||||
}
|
||||
|
||||
func TestRuntimeHelpersAndSubcommands(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
@ -2015,49 +1455,7 @@ func TestRuntimeHelpersAndSubcommands(t *testing.T) {
|
||||
s, err := store.Open(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "dm1", GuildID: store.DirectMessageGuildID, Kind: "dm", Name: "Alice", RawJSON: `{}`}))
|
||||
require.NoError(t, s.UpsertMember(ctx, store.MemberRecord{GuildID: "g1", UserID: "u1", Username: "peter", RoleIDsJSON: `[]`, RawJSON: `{}`}))
|
||||
base := time.Date(2026, 3, 8, 10, 0, 0, 0, time.UTC)
|
||||
require.NoError(t, s.UpsertMessages(ctx, []store.MessageMutation{
|
||||
{
|
||||
Record: store.MessageRecord{
|
||||
ID: "m1",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "general",
|
||||
AuthorID: "u1",
|
||||
AuthorName: "peter",
|
||||
CreatedAt: base.Format(time.RFC3339Nano),
|
||||
Content: "hello <@u1> in <#c1>",
|
||||
NormalizedContent: "hello <@u1> in <#c1>",
|
||||
RawJSON: `{"author":{"username":"peter"}}`,
|
||||
},
|
||||
Mentions: []store.MentionEventRecord{{
|
||||
MessageID: "m1",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
AuthorID: "u1",
|
||||
TargetType: "user",
|
||||
TargetID: "u1",
|
||||
TargetName: "peter",
|
||||
EventAt: base.Format(time.RFC3339Nano),
|
||||
}},
|
||||
},
|
||||
{
|
||||
Record: store.MessageRecord{
|
||||
ID: "dm-msg",
|
||||
GuildID: store.DirectMessageGuildID,
|
||||
ChannelID: "dm1",
|
||||
ChannelName: "Alice",
|
||||
AuthorID: "u2",
|
||||
AuthorName: "Alice",
|
||||
CreatedAt: base.Add(time.Minute).Format(time.RFC3339Nano),
|
||||
Content: "private hello",
|
||||
NormalizedContent: "private hello",
|
||||
RawJSON: `{"source":"discord_desktop"}`,
|
||||
},
|
||||
},
|
||||
}))
|
||||
require.NoError(t, s.Close())
|
||||
|
||||
rt := &runtime{
|
||||
@ -2077,57 +1475,15 @@ func TestRuntimeHelpersAndSubcommands(t *testing.T) {
|
||||
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--hours", "6", "--last", "1"}))
|
||||
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--days", "7", "--all"}))
|
||||
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--days", "7", "--all", "--include-empty"}))
|
||||
require.NoError(t, rt.runMessages([]string{"--channel", "#general", "--since", "2026-03-08T00:00:00Z", "--before", "2026-03-09T00:00:00Z", "--limit", "1"}))
|
||||
require.NoError(t, rt.runMessages([]string{"--dm", "--channel", "Alice", "--last", "1"}))
|
||||
require.NoError(t, rt.runDirectMessages([]string{"--list"}))
|
||||
require.NoError(t, rt.runDirectMessages([]string{"--with", "Alice", "--search", "private", "--limit", "1"}))
|
||||
require.NoError(t, rt.runDirectMessages([]string{"--with", "Alice", "--since", "2026-03-08T00:00:00Z", "--before", "2026-03-09T00:00:00Z", "--all"}))
|
||||
require.NoError(t, rt.runMentions([]string{"--channel", "#general", "--target", "u2"}))
|
||||
require.NoError(t, rt.runMentions([]string{"--channel", "#general", "--days", "7", "--type", "user"}))
|
||||
require.NoError(t, rt.runDigest([]string{"--since", "12h", "--channel", "general", "--top-n", "2"}))
|
||||
require.NoError(t, rt.runReport([]string{"--readme", filepath.Join(dir, "README.md")}))
|
||||
require.NoError(t, rt.runSearch([]string{"--include-empty", "Peter"}))
|
||||
require.NoError(t, rt.runChannels([]string{"show", "c1"}))
|
||||
require.NoError(t, rt.runChannels([]string{"list"}))
|
||||
require.NoError(t, rt.runStatus(nil))
|
||||
require.NoError(t, rt.runAnalytics([]string{}))
|
||||
require.NoError(t, rt.runTUI([]string{"--json", "--limit", "1", "--include-empty"}))
|
||||
require.NoError(t, rt.runAnalytics([]string{"quiet", "--since", "1d"}))
|
||||
require.NoError(t, rt.runAnalytics([]string{"trends", "--weeks", "1", "--channel", "general"}))
|
||||
return nil
|
||||
}))
|
||||
}
|
||||
|
||||
func TestRunInitWritesDiscoveredGuildConfig(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
t.Setenv(config.DefaultTokenEnv, "env-token")
|
||||
|
||||
fakeSync := &fakeSyncService{discovered: []*discordgo.UserGuild{{ID: "g1"}, {ID: "g2"}}}
|
||||
rt := &runtime{
|
||||
ctx: ctx,
|
||||
configPath: cfgPath,
|
||||
stdout: &bytes.Buffer{},
|
||||
stderr: &bytes.Buffer{},
|
||||
logger: discardLogger(),
|
||||
newDiscord: func(config.Config) (discordClient, error) { return &fakeDiscordClient{}, nil },
|
||||
newSyncer: func(syncer.Client, *store.Store, *slog.Logger) syncService {
|
||||
return fakeSync
|
||||
},
|
||||
}
|
||||
|
||||
require.NoError(t, rt.runInit([]string{"--db", dbPath, "--guild", "g2", "--with-embeddings"}))
|
||||
cfg, err := config.Load(cfgPath)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, dbPath, cfg.DBPath)
|
||||
require.Equal(t, []string{"g1", "g2"}, cfg.GuildIDs)
|
||||
require.Equal(t, "g2", cfg.DefaultGuildID)
|
||||
require.True(t, cfg.Search.Embeddings.Enabled)
|
||||
require.Contains(t, rt.stdout.(*bytes.Buffer).String(), "g2")
|
||||
}
|
||||
|
||||
func TestRunMembersShowUsesDefaultGuildForAmbiguousQuery(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@ -2302,17 +1658,6 @@ func TestRunMentionsValidation(t *testing.T) {
|
||||
rt := &runtime{stderr: &bytes.Buffer{}}
|
||||
rt.now = func() time.Time { return time.Date(2026, 3, 8, 12, 0, 0, 0, time.UTC) }
|
||||
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"extra"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--hours", "-1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--days", "-1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--hours", "1", "--days", "1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--hours", "1", "--since", "2026-03-01T00:00:00Z"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--limit", "-1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--last", "-1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--all", "--last", "1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--limit", "1", "--last", "1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--since", "bad"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runDirectMessages([]string{"--before", "bad"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--hours", "-1", "--channel", "general"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--hours", "1", "--days", "1", "--channel", "general"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--hours", "1", "--since", "2026-03-01T00:00:00Z", "--channel", "general"})))
|
||||
@ -2411,8 +1756,6 @@ func TestCommandUsageErrors(t *testing.T) {
|
||||
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--days", "-1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runMessages([]string{"--days", "1", "--since", "2026-03-01T00:00:00Z"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runSync([]string{"--all", "--guild", "g1"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runSync([]string{"--update", "bogus"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runSync([]string{"--update=force", "--no-update"})))
|
||||
require.Equal(t, 2, ExitCode(rt.runChannels(nil)))
|
||||
require.Equal(t, 2, ExitCode(rt.runStatus([]string{"extra"})))
|
||||
require.NoError(t, (&runtime{stdout: &bytes.Buffer{}}).runDoctor(nil))
|
||||
|
||||
@ -1,96 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/crawlkit/control"
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func (r *runtime) runMetadata(args []string) error {
|
||||
fs := flag.NewFlagSet("metadata", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
jsonOut := fs.Bool("json", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("metadata takes flags only"))
|
||||
}
|
||||
if *jsonOut {
|
||||
r.json = true
|
||||
}
|
||||
cfg := config.Default()
|
||||
manifest := control.NewManifest("discrawl", "Discord Crawl", "discrawl")
|
||||
manifest.Description = "Local-first Discord archive crawler."
|
||||
manifest.Branding = control.Branding{SymbolName: "bubble.left.and.bubble.right.fill", AccentColor: "#5865f2", BundleIdentifier: "com.hnc.Discord"}
|
||||
manifest.Paths = control.Paths{
|
||||
DefaultConfig: config.ResolvePath(""),
|
||||
ConfigEnv: config.DefaultConfigEnv,
|
||||
DefaultDatabase: cfg.DBPath,
|
||||
DefaultCache: cfg.CacheDir,
|
||||
DefaultLogs: cfg.LogDir,
|
||||
DefaultShare: cfg.Share.RepoPath,
|
||||
}
|
||||
manifest.Capabilities = []string{"metadata", "status", "doctor", "sync", "tap", "tui", "git-share", "sql", "embeddings"}
|
||||
manifest.Privacy = control.Privacy{ContainsPrivateMessages: true, ExportsSecrets: false, LocalOnlyScopes: []string{"discord", "desktop-cache", "sqlite", "git-share"}}
|
||||
manifest.Commands = map[string]control.Command{
|
||||
"status": {Title: "Status", Argv: []string{"discrawl", "status", "--json"}, JSON: true},
|
||||
"doctor": {Title: "Doctor", Argv: []string{"discrawl", "doctor", "--json"}, JSON: true},
|
||||
"sync": {Title: "Sync", Argv: []string{"discrawl", "--json", "sync"}, JSON: true, Mutates: true},
|
||||
"tap": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "tap"}, JSON: true, Mutates: true},
|
||||
"cache-import": {Title: "Import desktop cache", Argv: []string{"discrawl", "--json", "cache-import"}, JSON: true, Mutates: true},
|
||||
"wiretap": {Title: "Legacy desktop cache import", Argv: []string{"discrawl", "--json", "wiretap"}, JSON: true, Mutates: true, Legacy: true, Deprecated: true},
|
||||
"tui": {Title: "Terminal browser", Argv: []string{"discrawl", "tui"}},
|
||||
"tui-json": {Title: "Terminal browser rows", Argv: []string{"discrawl", "tui", "--json"}, JSON: true},
|
||||
"publish": {Title: "Publish share", Argv: []string{"discrawl", "--json", "publish"}, JSON: true, Mutates: true},
|
||||
"subscribe": {Title: "Subscribe share", Argv: []string{"discrawl", "--json", "subscribe"}, JSON: true, Mutates: true},
|
||||
"update": {Title: "Update share", Argv: []string{"discrawl", "--json", "update"}, JSON: true, Mutates: true},
|
||||
}
|
||||
return r.print(manifest)
|
||||
}
|
||||
|
||||
func controlStatus(configPath string, cfg config.Config, status store.Status, shareNeedsUpdate bool) control.Status {
|
||||
counts := []control.Count{
|
||||
control.NewCount("guilds", "Guilds", int64(status.GuildCount)),
|
||||
control.NewCount("channels", "Channels", int64(status.ChannelCount)),
|
||||
control.NewCount("threads", "Threads", int64(status.ThreadCount)),
|
||||
control.NewCount("messages", "Messages", int64(status.MessageCount)),
|
||||
control.NewCount("members", "Members", int64(status.MemberCount)),
|
||||
control.NewCount("embedding_backlog", "Embedding backlog", int64(status.EmbeddingBacklog)),
|
||||
}
|
||||
out := control.NewStatus("discrawl", fmt.Sprintf("%d messages across %d channels", status.MessageCount, status.ChannelCount))
|
||||
out.State = "current"
|
||||
out.ConfigPath = configPath
|
||||
out.DatabasePath = status.DBPath
|
||||
out.Counts = counts
|
||||
if !status.LastSyncAt.IsZero() {
|
||||
out.LastSyncAt = status.LastSyncAt.UTC().Format(time.RFC3339)
|
||||
}
|
||||
db := control.SQLiteDatabase("primary", "Discord archive", "archive", status.DBPath, true, counts)
|
||||
out.DatabaseBytes = db.Bytes
|
||||
out.WALBytes = fileSize(status.DBPath + "-wal")
|
||||
out.Databases = []control.Database{db}
|
||||
out.Share = &control.Share{
|
||||
Enabled: cfg.ShareEnabled(),
|
||||
RepoPath: cfg.Share.RepoPath,
|
||||
Remote: cfg.Share.Remote,
|
||||
Branch: cfg.Share.Branch,
|
||||
NeedsUpdate: shareNeedsUpdate,
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func fileSize(path string) int64 {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return info.Size()
|
||||
}
|
||||
@ -1,73 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/report"
|
||||
)
|
||||
|
||||
func (r *runtime) runDigest(args []string) error {
|
||||
fs := flag.NewFlagSet("digest", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
since := fs.String("since", "7d", "")
|
||||
guild := fs.String("guild", "", "")
|
||||
channel := fs.String("channel", "", "")
|
||||
topN := fs.Int("top-n", 3, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("digest takes no positional arguments"))
|
||||
}
|
||||
|
||||
lookback, err := parseLookback(*since)
|
||||
if err != nil {
|
||||
return usageErr(fmt.Errorf("parse --since: %w", err))
|
||||
}
|
||||
guildID := strings.TrimSpace(*guild)
|
||||
if guildID == "" {
|
||||
guildID = r.cfg.EffectiveDefaultGuildID()
|
||||
}
|
||||
|
||||
digest, err := report.BuildDigest(r.ctx, r.store, report.DigestOptions{
|
||||
Since: lookback,
|
||||
GuildID: guildID,
|
||||
Channel: *channel,
|
||||
TopN: *topN,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(digest)
|
||||
}
|
||||
|
||||
func parseLookback(value string) (time.Duration, error) {
|
||||
value = strings.TrimSpace(value)
|
||||
if value == "" {
|
||||
return 0, errors.New("empty duration")
|
||||
}
|
||||
if daysValue, ok := strings.CutSuffix(value, "d"); ok {
|
||||
days, err := strconv.Atoi(daysValue)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid day count: %w", err)
|
||||
}
|
||||
if days < 0 {
|
||||
return 0, errors.New("negative duration")
|
||||
}
|
||||
return time.Duration(days) * 24 * time.Hour, nil
|
||||
}
|
||||
d, err := time.ParseDuration(value)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if d < 0 {
|
||||
return 0, errors.New("negative duration")
|
||||
}
|
||||
return d, nil
|
||||
}
|
||||
@ -1,168 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestParseLookback(t *testing.T) {
|
||||
cases := []struct {
|
||||
in string
|
||||
want time.Duration
|
||||
err bool
|
||||
}{
|
||||
{"7d", 7 * 24 * time.Hour, false},
|
||||
{"30d", 30 * 24 * time.Hour, false},
|
||||
{"72h", 72 * time.Hour, false},
|
||||
{"30m", 30 * time.Minute, false},
|
||||
{"", 0, true},
|
||||
{"abc", 0, true},
|
||||
{"-2d", 0, true},
|
||||
{"-1h", 0, true},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
d, err := parseLookback(tc.in)
|
||||
if tc.err {
|
||||
require.Error(t, err, tc.in)
|
||||
continue
|
||||
}
|
||||
require.NoError(t, err, tc.in)
|
||||
require.Equal(t, tc.want, d, tc.in)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDigestCommand(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cfgPath := filepath.Join(dir, "config.toml")
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
|
||||
require.NoError(t, seedDigestCLIStore(ctx, dbPath))
|
||||
|
||||
cfg := config.Default()
|
||||
cfg.DBPath = dbPath
|
||||
cfg.DefaultGuildID = "g1"
|
||||
require.NoError(t, config.Write(cfgPath, cfg))
|
||||
|
||||
t.Run("since 7d happy path", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "digest", "--since", "7d"}, &out, &bytes.Buffer{}))
|
||||
require.Contains(t, out.String(), "general (text)")
|
||||
require.Contains(t, out.String(), "Window:")
|
||||
require.Contains(t, out.String(), "Totals: messages=")
|
||||
})
|
||||
|
||||
t.Run("json output", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "digest", "--since", "7d"}, &out, &bytes.Buffer{}))
|
||||
var payload map[string]any
|
||||
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
|
||||
require.Equal(t, "7d", payload["window_label"])
|
||||
require.InEpsilon(t, 3, payload["top_n"], 0.001)
|
||||
totals, ok := payload["totals"].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.InEpsilon(t, 2, totals["messages"], 0.001)
|
||||
require.Contains(t, totals, "replies")
|
||||
require.NotContains(t, totals, "threads")
|
||||
})
|
||||
|
||||
t.Run("channel name filter", func(t *testing.T) {
|
||||
var out bytes.Buffer
|
||||
require.NoError(t, Run(ctx, []string{"--config", cfgPath, "--json", "digest", "--channel", "incidents", "--since", "7d"}, &out, &bytes.Buffer{}))
|
||||
var payload map[string]any
|
||||
require.NoError(t, json.Unmarshal(out.Bytes(), &payload))
|
||||
channels, ok := payload["channels"].([]any)
|
||||
require.True(t, ok)
|
||||
require.Len(t, channels, 1)
|
||||
channel := channels[0].(map[string]any)
|
||||
require.Equal(t, "incidents", channel["channel_name"])
|
||||
})
|
||||
|
||||
t.Run("unknown flag fails", func(t *testing.T) {
|
||||
err := Run(ctx, []string{"--config", cfgPath, "digest", "--bogus"}, &bytes.Buffer{}, &bytes.Buffer{})
|
||||
require.Error(t, err)
|
||||
require.Equal(t, 2, ExitCode(err))
|
||||
})
|
||||
|
||||
t.Run("no positional args allowed", func(t *testing.T) {
|
||||
err := Run(ctx, []string{"--config", cfgPath, "digest", "extra"}, &bytes.Buffer{}, &bytes.Buffer{})
|
||||
require.Error(t, err)
|
||||
require.Equal(t, 2, ExitCode(err))
|
||||
})
|
||||
}
|
||||
|
||||
func seedDigestCLIStore(ctx context.Context, path string) error {
|
||||
s, err := store.Open(ctx, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = s.Close() }()
|
||||
|
||||
now := time.Now().UTC()
|
||||
if err := s.UpsertGuild(ctx, store.GuildRecord{ID: "g1", Name: "Guild", RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.UpsertChannel(ctx, store.ChannelRecord{ID: "c2", GuildID: "g1", Kind: "text", Name: "incidents", RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.UpsertMember(ctx, store.MemberRecord{GuildID: "g1", UserID: "u1", Username: "alice", DisplayName: "Alice", RoleIDsJSON: `[]`, RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.UpsertMember(ctx, store.MemberRecord{GuildID: "g1", UserID: "u2", Username: "bob", DisplayName: "Bob", RoleIDsJSON: `[]`, RawJSON: `{}`}); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.UpsertMessages(ctx, []store.MessageMutation{
|
||||
{
|
||||
Record: store.MessageRecord{
|
||||
ID: "m1",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
ChannelName: "general",
|
||||
AuthorID: "u1",
|
||||
AuthorName: "Alice",
|
||||
MessageType: 0,
|
||||
CreatedAt: now.Add(-2 * time.Hour).Format(time.RFC3339Nano),
|
||||
Content: "hello",
|
||||
NormalizedContent: "hello",
|
||||
RawJSON: `{}`,
|
||||
},
|
||||
Mentions: []store.MentionEventRecord{{
|
||||
MessageID: "m1",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c1",
|
||||
AuthorID: "u1",
|
||||
TargetType: "user",
|
||||
TargetID: "u2",
|
||||
TargetName: "Bob",
|
||||
EventAt: now.Add(-2 * time.Hour).Format(time.RFC3339Nano),
|
||||
}},
|
||||
},
|
||||
{
|
||||
Record: store.MessageRecord{
|
||||
ID: "m2",
|
||||
GuildID: "g1",
|
||||
ChannelID: "c2",
|
||||
ChannelName: "incidents",
|
||||
AuthorID: "u2",
|
||||
AuthorName: "Bob",
|
||||
MessageType: 0,
|
||||
CreatedAt: now.Add(-90 * time.Minute).Format(time.RFC3339Nano),
|
||||
Content: "incident",
|
||||
NormalizedContent: "incident",
|
||||
RawJSON: `{}`,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
@ -1,146 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
const defaultDMLast = 50
|
||||
|
||||
func (r *runtime) runDirectMessages(args []string) error {
|
||||
fs := flag.NewFlagSet("dms", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
with := fs.String("with", "", "")
|
||||
search := fs.String("search", "", "")
|
||||
hours := fs.Int("hours", 0, "")
|
||||
days := fs.Int("days", 0, "")
|
||||
since := fs.String("since", "", "")
|
||||
before := fs.String("before", "", "")
|
||||
limit := fs.Int("limit", defaultDMLast, "")
|
||||
last := fs.Int("last", defaultDMLast, "")
|
||||
all := fs.Bool("all", false, "")
|
||||
list := fs.Bool("list", false, "")
|
||||
includeEmpty := fs.Bool("include-empty", false, "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("dms takes flags only"))
|
||||
}
|
||||
if *hours < 0 {
|
||||
return usageErr(errors.New("--hours must be >= 0"))
|
||||
}
|
||||
if *days < 0 {
|
||||
return usageErr(errors.New("--days must be >= 0"))
|
||||
}
|
||||
if countNonZero(*hours > 0, *days > 0, strings.TrimSpace(*since) != "") > 1 {
|
||||
return usageErr(errors.New("use only one of --hours, --days, or --since"))
|
||||
}
|
||||
if *limit < 0 {
|
||||
return usageErr(errors.New("--limit must be >= 0"))
|
||||
}
|
||||
if *last < 0 {
|
||||
return usageErr(errors.New("--last must be >= 0"))
|
||||
}
|
||||
if *all && *last > 0 && flagPassed(fs, "last") {
|
||||
return usageErr(errors.New("use either --all or --last"))
|
||||
}
|
||||
if flagPassed(fs, "limit") && flagPassed(fs, "last") {
|
||||
return usageErr(errors.New("use either --limit or --last"))
|
||||
}
|
||||
|
||||
if *list || (strings.TrimSpace(*with) == "" && strings.TrimSpace(*search) == "" && noDMMessageTimeFilter(*hours, *days, *since, *before)) {
|
||||
rows, err := r.store.DirectMessageConversations(r.ctx, store.DirectMessageConversationOptions{With: *with})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(rows)
|
||||
}
|
||||
|
||||
sinceTime, beforeTime, err := r.parseMessageWindow(*hours, *days, *since, *before)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if query := strings.TrimSpace(*search); query != "" {
|
||||
opts := store.SearchOptions{
|
||||
Query: query,
|
||||
GuildIDs: []string{store.DirectMessageGuildID},
|
||||
Channel: *with,
|
||||
Limit: *limit,
|
||||
IncludeEmpty: *includeEmpty,
|
||||
}
|
||||
results, err := r.store.SearchMessages(r.ctx, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(results)
|
||||
}
|
||||
|
||||
messageLimit := *limit
|
||||
messageLast := *last
|
||||
switch {
|
||||
case *all:
|
||||
messageLimit = 0
|
||||
messageLast = 0
|
||||
case flagPassed(fs, "limit"):
|
||||
messageLast = 0
|
||||
default:
|
||||
messageLimit = 0
|
||||
}
|
||||
rows, err := r.store.ListMessages(r.ctx, store.MessageListOptions{
|
||||
GuildIDs: []string{store.DirectMessageGuildID},
|
||||
Channel: *with,
|
||||
Since: sinceTime,
|
||||
Before: beforeTime,
|
||||
Limit: messageLimit,
|
||||
Last: messageLast,
|
||||
IncludeEmpty: *includeEmpty,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(rows)
|
||||
}
|
||||
|
||||
func (r *runtime) parseMessageWindow(hours, days int, since, before string) (time.Time, time.Time, error) {
|
||||
var sinceTime time.Time
|
||||
var beforeTime time.Time
|
||||
var err error
|
||||
if hours > 0 {
|
||||
now := time.Now().UTC()
|
||||
if r.now != nil {
|
||||
now = r.now().UTC()
|
||||
}
|
||||
sinceTime = now.Add(-time.Duration(hours) * time.Hour)
|
||||
}
|
||||
if days > 0 {
|
||||
now := time.Now().UTC()
|
||||
if r.now != nil {
|
||||
now = r.now().UTC()
|
||||
}
|
||||
sinceTime = now.Add(-time.Duration(days) * 24 * time.Hour)
|
||||
}
|
||||
if strings.TrimSpace(since) != "" {
|
||||
sinceTime, err = time.Parse(time.RFC3339, since)
|
||||
if err != nil {
|
||||
return time.Time{}, time.Time{}, usageErr(fmt.Errorf("invalid --since: %w", err))
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(before) != "" {
|
||||
beforeTime, err = time.Parse(time.RFC3339, before)
|
||||
if err != nil {
|
||||
return time.Time{}, time.Time{}, usageErr(fmt.Errorf("invalid --before: %w", err))
|
||||
}
|
||||
}
|
||||
return sinceTime, beforeTime, nil
|
||||
}
|
||||
|
||||
func noDMMessageTimeFilter(hours, days int, since, before string) bool {
|
||||
return hours == 0 && days == 0 && strings.TrimSpace(since) == "" && strings.TrimSpace(before) == ""
|
||||
}
|
||||
@ -18,7 +18,7 @@ func TestDockerGitSourceSmoke(t *testing.T) {
|
||||
t.Skip("docker is not installed")
|
||||
}
|
||||
root := repoRoot(t)
|
||||
cmd := exec.CommandContext(t.Context(), "bash", filepath.Join(root, "scripts", "docker-git-source-smoke.sh"))
|
||||
cmd := exec.Command("bash", filepath.Join(root, "scripts", "docker-git-source-smoke.sh"))
|
||||
cmd.Dir = root
|
||||
out, err := cmd.CombinedOutput()
|
||||
require.NoError(t, err, string(out))
|
||||
@ -26,7 +26,7 @@ func TestDockerGitSourceSmoke(t *testing.T) {
|
||||
|
||||
func repoRoot(t *testing.T) string {
|
||||
t.Helper()
|
||||
cmd := exec.CommandContext(t.Context(), "git", "rev-parse", "--show-toplevel")
|
||||
cmd := exec.Command("git", "rev-parse", "--show-toplevel")
|
||||
out, err := cmd.Output()
|
||||
require.NoError(t, err)
|
||||
return strings.TrimSpace(string(out))
|
||||
|
||||
@ -1,12 +1,10 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func (r *runtime) resolveSyncGuilds(guild, guilds string) []string {
|
||||
@ -26,7 +24,7 @@ func (r *runtime) resolveSyncGuildsAll(guild, guilds string, all bool) ([]string
|
||||
return r.resolveSyncGuilds(guild, guilds), nil
|
||||
}
|
||||
if len(csvList(guilds)) > 0 || strings.TrimSpace(guild) != "" {
|
||||
return nil, errors.New("use either --all or --guild/--guilds")
|
||||
return nil, fmt.Errorf("use either --all or --guild/--guilds")
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
@ -36,17 +34,6 @@ func (r *runtime) resolveSearchGuilds(guild, guilds string) []string {
|
||||
return csvList(strings.Join(requested, ","))
|
||||
}
|
||||
|
||||
func directMessageGuildScope(dm bool, guild, guilds string) ([]string, error) {
|
||||
if !dm {
|
||||
requested := append(csvList(guilds), strings.TrimSpace(guild))
|
||||
return csvList(strings.Join(requested, ",")), nil
|
||||
}
|
||||
if len(csvList(guilds)) > 0 || strings.TrimSpace(guild) != "" {
|
||||
return nil, errors.New("use either --dm or --guild/--guilds")
|
||||
}
|
||||
return []string{store.DirectMessageGuildID}, nil
|
||||
}
|
||||
|
||||
func csvList(raw string) []string {
|
||||
if raw == "" {
|
||||
return nil
|
||||
|
||||
@ -1,14 +1,13 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func (r *runtime) runMentions(args []string) error {
|
||||
@ -28,19 +27,19 @@ func (r *runtime) runMentions(args []string) error {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("mentions takes flags only"))
|
||||
return usageErr(fmt.Errorf("mentions takes flags only"))
|
||||
}
|
||||
if *days < 0 {
|
||||
return usageErr(errors.New("--days must be >= 0"))
|
||||
return usageErr(fmt.Errorf("--days must be >= 0"))
|
||||
}
|
||||
if *days > 0 && strings.TrimSpace(*since) != "" {
|
||||
return usageErr(errors.New("use either --days or --since"))
|
||||
return usageErr(fmt.Errorf("use either --days or --since"))
|
||||
}
|
||||
if *limit < 0 {
|
||||
return usageErr(errors.New("--limit must be >= 0"))
|
||||
return usageErr(fmt.Errorf("--limit must be >= 0"))
|
||||
}
|
||||
if targetTypeValue := strings.TrimSpace(*targetType); targetTypeValue != "" && targetTypeValue != "user" && targetTypeValue != "role" {
|
||||
return usageErr(errors.New("--type must be user or role"))
|
||||
return usageErr(fmt.Errorf("--type must be user or role"))
|
||||
}
|
||||
|
||||
var sinceTime time.Time
|
||||
@ -74,7 +73,7 @@ func (r *runtime) runMentions(args []string) error {
|
||||
sinceTime.IsZero() &&
|
||||
beforeTime.IsZero() &&
|
||||
len(guildIDs) == 0 {
|
||||
return usageErr(errors.New("mentions needs at least one filter"))
|
||||
return usageErr(fmt.Errorf("mentions needs at least one filter"))
|
||||
}
|
||||
|
||||
rows, err := r.store.ListMentions(r.ctx, store.MentionListOptions{
|
||||
|
||||
@ -1,14 +1,13 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
const defaultMessageLimit = 200
|
||||
@ -27,36 +26,35 @@ func (r *runtime) runMessages(args []string) error {
|
||||
all := fs.Bool("all", false, "")
|
||||
syncNow := fs.Bool("sync", false, "")
|
||||
includeEmpty := fs.Bool("include-empty", false, "")
|
||||
dm := fs.Bool("dm", false, "")
|
||||
guildsFlag := fs.String("guilds", "", "")
|
||||
guildFlag := fs.String("guild", "", "")
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("messages takes flags only"))
|
||||
return usageErr(fmt.Errorf("messages takes flags only"))
|
||||
}
|
||||
if *hours < 0 {
|
||||
return usageErr(errors.New("--hours must be >= 0"))
|
||||
return usageErr(fmt.Errorf("--hours must be >= 0"))
|
||||
}
|
||||
if *days < 0 {
|
||||
return usageErr(errors.New("--days must be >= 0"))
|
||||
return usageErr(fmt.Errorf("--days must be >= 0"))
|
||||
}
|
||||
if countNonZero(*hours > 0, *days > 0, strings.TrimSpace(*since) != "") > 1 {
|
||||
return usageErr(errors.New("use only one of --hours, --days, or --since"))
|
||||
return usageErr(fmt.Errorf("use only one of --hours, --days, or --since"))
|
||||
}
|
||||
if *limit < 0 {
|
||||
return usageErr(errors.New("--limit must be >= 0"))
|
||||
return usageErr(fmt.Errorf("--limit must be >= 0"))
|
||||
}
|
||||
if *last < 0 {
|
||||
return usageErr(errors.New("--last must be >= 0"))
|
||||
return usageErr(fmt.Errorf("--last must be >= 0"))
|
||||
}
|
||||
limitSet := flagPassed(fs, "limit")
|
||||
if *all && *last > 0 {
|
||||
return usageErr(errors.New("use either --all or --last"))
|
||||
return usageErr(fmt.Errorf("use either --all or --last"))
|
||||
}
|
||||
if limitSet && *last > 0 {
|
||||
return usageErr(errors.New("use either --limit or --last"))
|
||||
return usageErr(fmt.Errorf("use either --limit or --last"))
|
||||
}
|
||||
if *last > 0 {
|
||||
*limit = 0
|
||||
@ -92,15 +90,9 @@ func (r *runtime) runMessages(args []string) error {
|
||||
}
|
||||
}
|
||||
|
||||
guildIDs, err := directMessageGuildScope(*dm, *guildFlag, *guildsFlag)
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if *dm && *syncNow {
|
||||
return usageErr(errors.New("messages --sync is not supported with --dm; run wiretap or sync --source wiretap first"))
|
||||
}
|
||||
guildIDs := r.resolveSearchGuilds(*guildFlag, *guildsFlag)
|
||||
if strings.TrimSpace(*channel) == "" && strings.TrimSpace(*author) == "" && sinceTime.IsZero() && beforeTime.IsZero() && len(guildIDs) == 0 {
|
||||
return usageErr(errors.New("messages needs at least one filter"))
|
||||
return usageErr(fmt.Errorf("messages needs at least one filter"))
|
||||
}
|
||||
if *all {
|
||||
*limit = 0
|
||||
|
||||
@ -2,19 +2,16 @@ package cli
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/discorddesktop"
|
||||
"github.com/openclaw/discrawl/internal/report"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/openclaw/discrawl/internal/syncer"
|
||||
"github.com/steipete/discrawl/internal/discorddesktop"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/syncer"
|
||||
)
|
||||
|
||||
func (r *runtime) print(value any) error {
|
||||
@ -61,35 +58,13 @@ func printPlain(w io.Writer, value any) error {
|
||||
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\n", formatTime(row.CreatedAt), row.GuildID, row.ChannelID, row.AuthorID, row.MessageID, row.Content)
|
||||
}
|
||||
return nil
|
||||
case []store.DirectMessageConversationRow:
|
||||
for _, row := range v {
|
||||
_, _ = fmt.Fprintf(w, "%s\t%s\t%d\t%d\t%s\t%s\n", row.ChannelID, row.Name, row.MessageCount, row.AuthorCount, formatTime(row.FirstMessageAt), formatTime(row.LastMessageAt))
|
||||
}
|
||||
return nil
|
||||
case []store.MentionRow:
|
||||
for _, row := range v {
|
||||
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", formatTime(row.CreatedAt), row.GuildID, row.ChannelID, row.AuthorID, row.TargetType, row.TargetID, row.Content)
|
||||
}
|
||||
return nil
|
||||
case report.Digest:
|
||||
for _, row := range v.Channels {
|
||||
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%d\t%d\t%d\n", row.ChannelID, row.ChannelName, row.Kind, row.GuildID, row.Messages, row.Replies, row.ActiveAuthors)
|
||||
}
|
||||
return nil
|
||||
case report.Quiet:
|
||||
for _, row := range v.Channels {
|
||||
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%d\n", row.ChannelID, row.ChannelName, row.Kind, row.GuildID, row.LastMessage, row.DaysSilent)
|
||||
}
|
||||
return nil
|
||||
case report.Trends:
|
||||
for _, row := range v.Rows {
|
||||
for _, week := range row.Weekly {
|
||||
_, _ = fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%d\n", row.GuildID, row.ChannelID, row.ChannelName, row.Kind, formatTime(week.WeekStart), week.Messages)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
return errors.New("no plain printer")
|
||||
return fmt.Errorf("no plain printer")
|
||||
}
|
||||
}
|
||||
|
||||
@ -100,20 +75,12 @@ Usage:
|
||||
discrawl [global flags] <command> [args]
|
||||
|
||||
Commands:
|
||||
metadata
|
||||
version
|
||||
init
|
||||
sync
|
||||
tail
|
||||
tap
|
||||
cache-import
|
||||
wiretap
|
||||
search
|
||||
tui
|
||||
messages
|
||||
digest
|
||||
analytics
|
||||
dms
|
||||
mentions
|
||||
embed
|
||||
sql
|
||||
@ -147,8 +114,8 @@ func printHuman(w io.Writer, value any) error {
|
||||
}
|
||||
}
|
||||
if v.Wiretap != nil {
|
||||
if _, err := fmt.Fprintf(w, "wiretap_visited=%d\nwiretap_files=%d\nwiretap_unchanged=%d\nwiretap_fast_skipped=%d\nwiretap_messages=%d\nwiretap_dm_messages=%d\nwiretap_dm_channels=%d\nwiretap_guild_messages=%d\nwiretap_skipped_messages=%d\nwiretap_skipped_channels=%d\nwiretap_checkpoints=%d\n",
|
||||
v.Wiretap.FilesVisited, v.Wiretap.FilesScanned, v.Wiretap.FilesUnchanged, v.Wiretap.CacheFilesFastSkipped, v.Wiretap.Messages, v.Wiretap.DMMessages, v.Wiretap.DMChannels, v.Wiretap.GuildMessages, v.Wiretap.SkippedMessages, v.Wiretap.SkippedChannels, v.Wiretap.Checkpoints); err != nil {
|
||||
if _, err := fmt.Fprintf(w, "wiretap_messages=%d\nwiretap_dm_messages=%d\nwiretap_dm_channels=%d\nwiretap_guild_messages=%d\nwiretap_skipped_messages=%d\nwiretap_skipped_channels=%d\n",
|
||||
v.Wiretap.Messages, v.Wiretap.DMMessages, v.Wiretap.DMChannels, v.Wiretap.GuildMessages, v.Wiretap.SkippedMessages, v.Wiretap.SkippedChannels); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -157,8 +124,8 @@ func printHuman(w io.Writer, value any) error {
|
||||
_, err := fmt.Fprintf(w, "guilds=%d channels=%d threads=%d members=%d messages=%d\n", v.Guilds, v.Channels, v.Threads, v.Members, v.Messages)
|
||||
return err
|
||||
case discorddesktop.Stats:
|
||||
_, err := fmt.Fprintf(w, "path=%s\nvisited=%d\nfiles=%d\nskipped=%d\nunchanged=%d\nfast_skipped=%d\nobjects=%d\nguilds=%d\nchannels=%d\nmessages=%d\ndm_messages=%d\ndm_channels=%d\nguild_messages=%d\nskipped_messages=%d\nskipped_channels=%d\ncheckpoints=%d\nfull_cache=%t\ndry_run=%t\n",
|
||||
v.Path, v.FilesVisited, v.FilesScanned, v.FilesSkipped, v.FilesUnchanged, v.CacheFilesFastSkipped, v.JSONObjects, v.Guilds, v.Channels, v.Messages, v.DMMessages, v.DMChannels, v.GuildMessages, v.SkippedMessages, v.SkippedChannels, v.Checkpoints, v.FullCache, v.DryRun)
|
||||
_, err := fmt.Fprintf(w, "path=%s\nfiles=%d\nskipped=%d\nobjects=%d\nguilds=%d\nchannels=%d\nmessages=%d\ndm_messages=%d\ndm_channels=%d\nguild_messages=%d\nskipped_messages=%d\nskipped_channels=%d\ndry_run=%t\n",
|
||||
v.Path, v.FilesScanned, v.FilesSkipped, v.JSONObjects, v.Guilds, v.Channels, v.Messages, v.DMMessages, v.DMChannels, v.GuildMessages, v.SkippedMessages, v.SkippedChannels, v.DryRun)
|
||||
return err
|
||||
case store.Status:
|
||||
_, err := fmt.Fprintf(w, "db=%s\nguilds=%d\nchannels=%d\nthreads=%d\nmessages=%d\nmembers=%d\nembedding_backlog=%d\nlast_sync=%s\nlast_tail_event=%s\n",
|
||||
@ -194,20 +161,6 @@ func printHuman(w io.Writer, value any) error {
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case []store.DirectMessageConversationRow:
|
||||
tw := tabwriter.NewWriter(w, 2, 4, 2, ' ', 0)
|
||||
_, _ = fmt.Fprintln(tw, "CHANNEL\tNAME\tMESSAGES\tAUTHORS\tFIRST\tLAST")
|
||||
for _, row := range v {
|
||||
_, _ = fmt.Fprintf(tw, "%s\t%s\t%d\t%d\t%s\t%s\n",
|
||||
row.ChannelID,
|
||||
row.Name,
|
||||
row.MessageCount,
|
||||
row.AuthorCount,
|
||||
formatTime(row.FirstMessageAt),
|
||||
formatTime(row.LastMessageAt),
|
||||
)
|
||||
}
|
||||
return tw.Flush()
|
||||
case []store.MentionRow:
|
||||
for _, row := range v {
|
||||
if _, err := fmt.Fprintf(w, "[%s/%s] %s -> %s:%s %s\n%s\n\n", row.GuildID, row.ChannelName, row.AuthorName, row.TargetType, firstNonEmpty(row.TargetName, row.TargetID), formatTime(row.CreatedAt), row.Content); err != nil {
|
||||
@ -300,74 +253,6 @@ func printHuman(w io.Writer, value any) error {
|
||||
_, _ = fmt.Fprintf(tw, "%s\t%s\t%s\t%s\n", row.GuildID, row.ID, row.Kind, row.Name)
|
||||
}
|
||||
return tw.Flush()
|
||||
case report.Digest:
|
||||
for _, channel := range v.Channels {
|
||||
if _, err := fmt.Fprintf(w, "%s (%s)\n", channel.ChannelName, firstNonEmpty(channel.Kind, "unknown")); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Fprintf(w, " messages=%d replies=%d authors=%d\n", channel.Messages, channel.Replies, channel.ActiveAuthors); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Fprintf(w, " top posters %s\n", formatRankedCounts(channel.TopPosters)); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Fprintf(w, " top mentions %s\n\n", formatRankedCounts(channel.TopMentions)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := fmt.Fprintf(w, "Window: %s to %s (%s)\n", formatTime(v.Since), formatTime(v.Until), v.WindowLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := fmt.Fprintf(w, "Totals: messages=%d replies=%d channels=%d authors=%d\n", v.Totals.Messages, v.Totals.Replies, v.Totals.Channels, v.Totals.ActiveAuthors)
|
||||
return err
|
||||
case report.Quiet:
|
||||
tw := tabwriter.NewWriter(w, 2, 4, 2, ' ', 0)
|
||||
_, _ = fmt.Fprintln(tw, "CHANNEL\tKIND\tLAST MESSAGE\tDAYS SILENT")
|
||||
for _, row := range v.Channels {
|
||||
_, _ = fmt.Fprintf(tw, "%s\t%s\t%s\t%s\n",
|
||||
row.ChannelName,
|
||||
firstNonEmpty(row.Kind, "unknown"),
|
||||
firstNonEmpty(row.LastMessage, "never"),
|
||||
formatDaysSilent(row.DaysSilent),
|
||||
)
|
||||
}
|
||||
if err := tw.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Fprintf(w, "\nWindow: %s to %s (%s)\n", formatTime(v.Since), formatTime(v.Until), formatWindowDuration(v.Until.Sub(v.Since))); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := fmt.Fprintf(w, "Totals: channels=%d\n", v.Totals.Channels)
|
||||
return err
|
||||
case report.Trends:
|
||||
tw := tabwriter.NewWriter(w, 2, 4, 2, ' ', 0)
|
||||
header := []string{"CHANNEL", "KIND", "TOTAL"}
|
||||
weekStarts := make([]time.Time, 0, v.Weeks)
|
||||
if len(v.Rows) > 0 {
|
||||
for _, week := range v.Rows[0].Weekly {
|
||||
weekStarts = append(weekStarts, week.WeekStart)
|
||||
}
|
||||
} else {
|
||||
for i := range v.Weeks {
|
||||
weekStarts = append(weekStarts, v.Since.AddDate(0, 0, 7*i))
|
||||
}
|
||||
}
|
||||
for _, start := range weekStarts {
|
||||
header = append(header, start.Format(time.DateOnly))
|
||||
}
|
||||
_, _ = fmt.Fprintln(tw, strings.Join(header, "\t"))
|
||||
for _, row := range v.Rows {
|
||||
cols := []string{row.ChannelName, firstNonEmpty(row.Kind, "unknown"), strconv.Itoa(trendsRowTotal(row.Weekly))}
|
||||
for _, week := range row.Weekly {
|
||||
cols = append(cols, strconv.Itoa(week.Messages))
|
||||
}
|
||||
_, _ = fmt.Fprintln(tw, strings.Join(cols, "\t"))
|
||||
}
|
||||
if err := tw.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := fmt.Fprintf(w, "\nWindow: %s to %s (%d weeks)\n", formatTime(v.Since), formatTime(v.Until), v.Weeks)
|
||||
return err
|
||||
case map[string]any:
|
||||
keys := make([]string, 0, len(v))
|
||||
for key := range v {
|
||||
@ -381,7 +266,7 @@ func printHuman(w io.Writer, value any) error {
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
return errors.New("no human printer")
|
||||
return fmt.Errorf("no human printer")
|
||||
}
|
||||
}
|
||||
|
||||
@ -416,42 +301,3 @@ func trimForTable(value string) string {
|
||||
}
|
||||
return value[:37] + "..."
|
||||
}
|
||||
|
||||
func formatRankedCounts(rows []report.RankedCount) string {
|
||||
if len(rows) == 0 {
|
||||
return "-"
|
||||
}
|
||||
parts := make([]string, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
parts = append(parts, fmt.Sprintf("%s (%d)", firstNonEmpty(row.Name, "unknown"), row.Count))
|
||||
}
|
||||
return strings.Join(parts, ", ")
|
||||
}
|
||||
|
||||
func formatDaysSilent(days int) string {
|
||||
if days < 0 {
|
||||
return "-"
|
||||
}
|
||||
return strconv.Itoa(days)
|
||||
}
|
||||
|
||||
func formatWindowDuration(d time.Duration) string {
|
||||
if d <= 0 {
|
||||
return "0"
|
||||
}
|
||||
if d%(24*time.Hour) == 0 {
|
||||
return fmt.Sprintf("%dd", int(d/(24*time.Hour)))
|
||||
}
|
||||
if d%time.Hour == 0 {
|
||||
return fmt.Sprintf("%dh", int(d/time.Hour))
|
||||
}
|
||||
return d.String()
|
||||
}
|
||||
|
||||
func trendsRowTotal(weekly []report.WeeklyCount) int {
|
||||
total := 0
|
||||
for _, row := range weekly {
|
||||
total += row.Messages
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
@ -7,8 +7,8 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/openclaw/discrawl/internal/syncer"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/syncer"
|
||||
)
|
||||
|
||||
func TestPrintRows(t *testing.T) {
|
||||
|
||||
@ -9,9 +9,9 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/openclaw/crawlkit/embed"
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/config"
|
||||
"github.com/steipete/discrawl/internal/embed"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func (r *runtime) runSearch(args []string) error {
|
||||
@ -22,22 +22,17 @@ func (r *runtime) runSearch(args []string) error {
|
||||
author := fs.String("author", "", "")
|
||||
limit := fs.Int("limit", 20, "")
|
||||
includeEmpty := fs.Bool("include-empty", false, "")
|
||||
dm := fs.Bool("dm", false, "")
|
||||
guildsFlag := fs.String("guilds", "", "")
|
||||
guildFlag := fs.String("guild", "", "")
|
||||
if err := fs.Parse(permuteSearchFlags(args)); err != nil {
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 1 {
|
||||
return usageErr(errors.New("search requires a query"))
|
||||
}
|
||||
guildIDs, err := directMessageGuildScope(*dm, *guildFlag, *guildsFlag)
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
return usageErr(fmt.Errorf("search requires a query"))
|
||||
}
|
||||
opts := store.SearchOptions{
|
||||
Query: fs.Arg(0),
|
||||
GuildIDs: guildIDs,
|
||||
GuildIDs: r.resolveSearchGuilds(*guildFlag, *guildsFlag),
|
||||
Channel: *channel,
|
||||
Author: *author,
|
||||
Limit: *limit,
|
||||
@ -67,51 +62,6 @@ func (r *runtime) runSearch(args []string) error {
|
||||
}
|
||||
}
|
||||
|
||||
func permuteSearchFlags(args []string) []string {
|
||||
valueFlags := map[string]struct{}{
|
||||
"--mode": {},
|
||||
"--channel": {},
|
||||
"--author": {},
|
||||
"--limit": {},
|
||||
"--guilds": {},
|
||||
"--guild": {},
|
||||
}
|
||||
boolFlags := map[string]struct{}{
|
||||
"--include-empty": {},
|
||||
"--dm": {},
|
||||
}
|
||||
flags := make([]string, 0, len(args))
|
||||
positionals := make([]string, 0, len(args))
|
||||
for i := 0; i < len(args); i++ {
|
||||
arg := args[i]
|
||||
if arg == "--" {
|
||||
positionals = append(positionals, args[i+1:]...)
|
||||
break
|
||||
}
|
||||
if name, _, ok := strings.Cut(arg, "="); ok {
|
||||
if _, known := valueFlags[name]; known {
|
||||
flags = append(flags, arg)
|
||||
continue
|
||||
}
|
||||
if _, known := boolFlags[name]; known {
|
||||
flags = append(flags, arg)
|
||||
continue
|
||||
}
|
||||
}
|
||||
if _, known := boolFlags[arg]; known {
|
||||
flags = append(flags, arg)
|
||||
continue
|
||||
}
|
||||
if _, known := valueFlags[arg]; known && i+1 < len(args) {
|
||||
flags = append(flags, arg, args[i+1])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
positionals = append(positionals, arg)
|
||||
}
|
||||
return append(flags, positionals...)
|
||||
}
|
||||
|
||||
func (r *runtime) searchMessagesSemantic(opts store.SearchOptions) ([]store.SearchResult, error) {
|
||||
semanticOpts, err := r.semanticSearchOptions(opts)
|
||||
if err != nil {
|
||||
@ -152,12 +102,12 @@ func (r *runtime) searchMessagesHybrid(opts store.SearchOptions) ([]store.Search
|
||||
|
||||
func (r *runtime) semanticSearchOptions(opts store.SearchOptions) (store.SemanticSearchOptions, error) {
|
||||
if !r.cfg.Search.Embeddings.Enabled {
|
||||
return store.SemanticSearchOptions{}, errors.New("embeddings are disabled; enable [search.embeddings] first")
|
||||
return store.SemanticSearchOptions{}, fmt.Errorf("embeddings are disabled; enable [search.embeddings] first")
|
||||
}
|
||||
providerFactory := r.newEmbed
|
||||
if providerFactory == nil {
|
||||
providerFactory = func(cfg config.EmbeddingsConfig) (embed.Provider, error) {
|
||||
return embed.NewProvider(crawlkitEmbeddingConfig(cfg))
|
||||
return embed.NewProvider(cfg)
|
||||
}
|
||||
}
|
||||
provider, err := providerFactory(r.cfg.Search.Embeddings)
|
||||
@ -203,7 +153,7 @@ func (r *runtime) runSQL(args []string) error {
|
||||
return usageErr(err)
|
||||
}
|
||||
if *confirm && !*unsafe {
|
||||
return usageErr(errors.New("--confirm requires --unsafe"))
|
||||
return usageErr(fmt.Errorf("--confirm requires --unsafe"))
|
||||
}
|
||||
|
||||
var query string
|
||||
@ -229,7 +179,7 @@ func (r *runtime) runSQL(args []string) error {
|
||||
return printRows(r.stdout, cols, rows)
|
||||
}
|
||||
if !*confirm {
|
||||
return usageErr(errors.New("--unsafe requires --confirm"))
|
||||
return usageErr(fmt.Errorf("--unsafe requires --confirm"))
|
||||
}
|
||||
|
||||
if store.IsReadOnlySQL(query) {
|
||||
@ -252,7 +202,7 @@ func (r *runtime) runSQL(args []string) error {
|
||||
|
||||
func (r *runtime) runMembers(args []string) error {
|
||||
if len(args) == 0 {
|
||||
return usageErr(errors.New("members requires a subcommand"))
|
||||
return usageErr(fmt.Errorf("members requires a subcommand"))
|
||||
}
|
||||
switch args[0] {
|
||||
case "list":
|
||||
@ -265,7 +215,7 @@ func (r *runtime) runMembers(args []string) error {
|
||||
return r.runMembersShow(args[1:])
|
||||
case "search":
|
||||
if len(args) < 2 {
|
||||
return usageErr(errors.New("members search requires a query"))
|
||||
return usageErr(fmt.Errorf("members search requires a query"))
|
||||
}
|
||||
rows, err := r.store.Members(r.ctx, "", strings.Join(args[1:], " "), 100)
|
||||
if err != nil {
|
||||
@ -285,7 +235,7 @@ func (r *runtime) runMembersShow(args []string) error {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() < 1 {
|
||||
return usageErr(errors.New("members show requires a user id or query"))
|
||||
return usageErr(fmt.Errorf("members show requires a user id or query"))
|
||||
}
|
||||
query := strings.Join(fs.Args(), " ")
|
||||
|
||||
@ -327,7 +277,7 @@ func (r *runtime) runMembersShow(args []string) error {
|
||||
|
||||
func (r *runtime) runChannels(args []string) error {
|
||||
if len(args) == 0 {
|
||||
return usageErr(errors.New("channels requires a subcommand"))
|
||||
return usageErr(fmt.Errorf("channels requires a subcommand"))
|
||||
}
|
||||
rows, err := r.store.Channels(r.ctx, "")
|
||||
if err != nil {
|
||||
@ -338,7 +288,7 @@ func (r *runtime) runChannels(args []string) error {
|
||||
return r.print(rows)
|
||||
case "show":
|
||||
if len(args) < 2 {
|
||||
return usageErr(errors.New("channels show requires a channel id"))
|
||||
return usageErr(fmt.Errorf("channels show requires a channel id"))
|
||||
}
|
||||
filtered := make([]store.ChannelRow, 0, 1)
|
||||
for _, row := range rows {
|
||||
|
||||
@ -1,17 +1,16 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/syncer"
|
||||
"github.com/steipete/discrawl/internal/syncer"
|
||||
)
|
||||
|
||||
func (r *runtime) syncMessagesQuery(channel, guild, guilds string) error {
|
||||
if r.syncer == nil {
|
||||
return usageErr(errors.New("messages --sync requires Discord access"))
|
||||
return usageErr(fmt.Errorf("messages --sync requires Discord access"))
|
||||
}
|
||||
opts, err := r.messageSyncOptions(channel, guild, guilds)
|
||||
if err != nil {
|
||||
@ -31,7 +30,7 @@ func (r *runtime) messageSyncOptions(channel, guild, guilds string) (syncer.Sync
|
||||
channelFilter := strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(channel), "#"))
|
||||
if channelFilter == "" {
|
||||
if len(opts.GuildIDs) == 0 {
|
||||
return opts, errors.New("messages --sync needs --channel or --guild")
|
||||
return opts, fmt.Errorf("messages --sync needs --channel or --guild")
|
||||
}
|
||||
return opts, nil
|
||||
}
|
||||
@ -96,27 +95,3 @@ func hasBoolFlag(args []string, name string) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func boolFlagEnabled(args []string, name string) bool {
|
||||
for _, arg := range args {
|
||||
if arg == name {
|
||||
return true
|
||||
}
|
||||
if raw, ok := strings.CutPrefix(arg, name+"="); ok {
|
||||
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||
case "1", "t", "true", "y", "yes", "on":
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func hasHelpArg(args []string) bool {
|
||||
for _, arg := range args {
|
||||
if arg == "help" || arg == "--help" || arg == "-h" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@ -8,8 +8,8 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/config"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestMessageSyncOptionsNumericChannelID(t *testing.T) {
|
||||
|
||||
@ -1,11 +1,11 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/report"
|
||||
"github.com/steipete/discrawl/internal/report"
|
||||
)
|
||||
|
||||
func (r *runtime) runReport(args []string) error {
|
||||
@ -16,7 +16,7 @@ func (r *runtime) runReport(args []string) error {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("report takes no positional arguments"))
|
||||
return usageErr(fmt.Errorf("report takes no positional arguments"))
|
||||
}
|
||||
activity, err := report.Build(r.ctx, r.store, report.Options{})
|
||||
if err != nil {
|
||||
|
||||
@ -1,17 +1,19 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
"github.com/openclaw/discrawl/internal/report"
|
||||
"github.com/openclaw/discrawl/internal/share"
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/config"
|
||||
"github.com/steipete/discrawl/internal/report"
|
||||
"github.com/steipete/discrawl/internal/share"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
const defaultShareRemote = "https://github.com/openclaw/discord-backup.git"
|
||||
|
||||
func (r *runtime) runPublish(args []string) error {
|
||||
fs := flag.NewFlagSet("publish", flag.ContinueOnError)
|
||||
fs.SetOutput(io.Discard)
|
||||
@ -27,7 +29,7 @@ func (r *runtime) runPublish(args []string) error {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("publish takes no positional arguments"))
|
||||
return usageErr(fmt.Errorf("publish takes no positional arguments"))
|
||||
}
|
||||
opts, err := shareOptionsFromFlags(*repoPath, *remote, *branch)
|
||||
if err != nil {
|
||||
@ -96,10 +98,13 @@ func (r *runtime) runSubscribe(args []string) error {
|
||||
if err := fs.Parse(args); err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 1 {
|
||||
return usageErr(errors.New("subscribe requires one remote"))
|
||||
remote := defaultShareRemote
|
||||
if fs.NArg() > 1 {
|
||||
return usageErr(fmt.Errorf("subscribe takes at most one remote"))
|
||||
}
|
||||
if fs.NArg() == 1 {
|
||||
remote = fs.Arg(0)
|
||||
}
|
||||
remote := fs.Arg(0)
|
||||
cfg, err := loadConfigOrDefault(r.configPath)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -125,39 +130,34 @@ func (r *runtime) runSubscribe(args []string) error {
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
r.cfg = cfg
|
||||
return r.withSyncLock(func() error {
|
||||
s, err := store.Open(r.ctx, dbPath)
|
||||
if err != nil {
|
||||
return dbErr(err)
|
||||
}
|
||||
defer func() { _ = s.Close() }()
|
||||
expandedRepo, err := config.ExpandPath(cfg.Share.RepoPath)
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
opts := share.Options{RepoPath: expandedRepo, Remote: cfg.Share.Remote, Branch: cfg.Share.Branch, Progress: r.shareProgress}
|
||||
if *withEmbeddings {
|
||||
applyEmbeddingShareOptions(&opts, cfg)
|
||||
}
|
||||
r.setSyncLockPhase("share pull")
|
||||
if err := share.Pull(r.ctx, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
r.setSyncLockPhase("share import")
|
||||
manifest, imported, err := share.ImportIfChanged(r.ctx, s, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(map[string]any{
|
||||
"config_path": r.configPath,
|
||||
"repo_path": opts.RepoPath,
|
||||
"remote": opts.Remote,
|
||||
"generated_at": manifest.GeneratedAt,
|
||||
"tables": manifest.Tables,
|
||||
"embeddings": manifest.Embeddings,
|
||||
"imported": imported,
|
||||
})
|
||||
s, err := store.Open(r.ctx, dbPath)
|
||||
if err != nil {
|
||||
return dbErr(err)
|
||||
}
|
||||
defer func() { _ = s.Close() }()
|
||||
expandedRepo, err := config.ExpandPath(cfg.Share.RepoPath)
|
||||
if err != nil {
|
||||
return configErr(err)
|
||||
}
|
||||
opts := share.Options{RepoPath: expandedRepo, Remote: cfg.Share.Remote, Branch: cfg.Share.Branch}
|
||||
if *withEmbeddings {
|
||||
applyEmbeddingShareOptions(&opts, cfg)
|
||||
}
|
||||
if err := share.Pull(r.ctx, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
manifest, imported, err := share.ImportIfChanged(r.ctx, s, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return r.print(map[string]any{
|
||||
"config_path": r.configPath,
|
||||
"repo_path": opts.RepoPath,
|
||||
"remote": opts.Remote,
|
||||
"generated_at": manifest.GeneratedAt,
|
||||
"tables": manifest.Tables,
|
||||
"embeddings": manifest.Embeddings,
|
||||
"imported": imported,
|
||||
})
|
||||
}
|
||||
|
||||
@ -172,21 +172,18 @@ func (r *runtime) runUpdate(args []string) error {
|
||||
return usageErr(err)
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("update takes no positional arguments"))
|
||||
return usageErr(fmt.Errorf("update takes no positional arguments"))
|
||||
}
|
||||
opts, err := shareOptionsFromFlags(*repoPath, *remote, *branch)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.Progress = r.shareProgress
|
||||
if *withEmbeddings {
|
||||
applyEmbeddingShareOptions(&opts, r.cfg)
|
||||
}
|
||||
r.setSyncLockPhase("share pull")
|
||||
if err := share.Pull(r.ctx, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
r.setSyncLockPhase("share import")
|
||||
manifest, imported, err := share.ImportIfChanged(r.ctx, r.store, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -207,7 +204,7 @@ func shareOptionsFromFlags(repoPath, remote, branch string) (share.Options, erro
|
||||
return share.Options{}, configErr(err)
|
||||
}
|
||||
if remote == "" {
|
||||
return share.Options{}, configErr(errors.New("share remote is required"))
|
||||
remote = defaultShareRemote
|
||||
}
|
||||
if branch == "" {
|
||||
branch = "main"
|
||||
|
||||
@ -1,110 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/share"
|
||||
)
|
||||
|
||||
type shareUpdateMode string
|
||||
|
||||
const (
|
||||
shareUpdateConfigured shareUpdateMode = "configured"
|
||||
shareUpdateAuto shareUpdateMode = "auto"
|
||||
shareUpdateNever shareUpdateMode = "never"
|
||||
shareUpdateForce shareUpdateMode = "force"
|
||||
)
|
||||
|
||||
func boolShareUpdateMode(enabled bool) shareUpdateMode {
|
||||
if enabled {
|
||||
return shareUpdateConfigured
|
||||
}
|
||||
return shareUpdateNever
|
||||
}
|
||||
|
||||
func parseShareUpdateMode(raw string) (shareUpdateMode, error) {
|
||||
switch shareUpdateMode(strings.ToLower(strings.TrimSpace(raw))) {
|
||||
case "", shareUpdateAuto:
|
||||
return shareUpdateAuto, nil
|
||||
case shareUpdateNever:
|
||||
return shareUpdateNever, nil
|
||||
case shareUpdateForce:
|
||||
return shareUpdateForce, nil
|
||||
default:
|
||||
return "", fmt.Errorf("invalid --update %q; use auto, never, or force", raw)
|
||||
}
|
||||
}
|
||||
|
||||
func syncShareUpdateMode(args []string) (shareUpdateMode, error) {
|
||||
mode := shareUpdateNever
|
||||
sawNoUpdate := false
|
||||
sawUpdate := false
|
||||
for i := 0; i < len(args); i++ {
|
||||
arg := args[i]
|
||||
switch {
|
||||
case arg == "--no-update":
|
||||
sawNoUpdate = true
|
||||
mode = shareUpdateNever
|
||||
case arg == "--update":
|
||||
if i+1 >= len(args) {
|
||||
return "", errors.New("--update requires auto, never, or force")
|
||||
}
|
||||
parsed, err := parseShareUpdateMode(args[i+1])
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
sawUpdate = true
|
||||
mode = parsed
|
||||
i++
|
||||
case strings.HasPrefix(arg, "--update="):
|
||||
parsed, err := parseShareUpdateMode(strings.TrimPrefix(arg, "--update="))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
sawUpdate = true
|
||||
mode = parsed
|
||||
}
|
||||
}
|
||||
if sawNoUpdate && sawUpdate && mode != shareUpdateNever {
|
||||
return "", errors.New("use either --no-update or --update, not both")
|
||||
}
|
||||
return mode, nil
|
||||
}
|
||||
|
||||
func (r *runtime) shareProgress(progress share.ImportProgress) {
|
||||
if progress.Phase == "" {
|
||||
return
|
||||
}
|
||||
phase := "share " + progress.Phase
|
||||
if progress.Table != "" {
|
||||
phase += " " + progress.Table
|
||||
}
|
||||
if progress.File != "" {
|
||||
phase += " " + progress.File
|
||||
}
|
||||
r.setSyncLockPhase(phase)
|
||||
attrs := []any{"phase", progress.Phase}
|
||||
if progress.Table != "" {
|
||||
attrs = append(attrs, "table", progress.Table)
|
||||
}
|
||||
if progress.Rows != 0 {
|
||||
attrs = append(attrs, "rows", progress.Rows)
|
||||
}
|
||||
if progress.TotalRows != 0 {
|
||||
attrs = append(attrs, "total_rows", progress.TotalRows)
|
||||
}
|
||||
if progress.File != "" {
|
||||
attrs = append(attrs, "file", progress.File, "file_index", progress.FileIndex, "file_count", progress.FileCount)
|
||||
}
|
||||
r.logger.Info("share import progress", attrs...)
|
||||
}
|
||||
|
||||
func (r *runtime) nowUTC() time.Time {
|
||||
if r.now != nil {
|
||||
return r.now().UTC()
|
||||
}
|
||||
return time.Now().UTC()
|
||||
}
|
||||
@ -1,100 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/config"
|
||||
)
|
||||
|
||||
func (r *runtime) withSyncLock(fn func() error) error {
|
||||
if r.dbLockHeld {
|
||||
return fn()
|
||||
}
|
||||
lockPath, err := r.syncLockPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
release, err := acquireSyncLock(r.ctx, lockPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
r.dbLockHeld = true
|
||||
r.lockStarted = r.nowUTC()
|
||||
r.setSyncLockPhase("locked")
|
||||
defer func() {
|
||||
r.dbLockHeld = false
|
||||
r.lockStarted = time.Time{}
|
||||
_ = release()
|
||||
}()
|
||||
return fn()
|
||||
}
|
||||
|
||||
func (r *runtime) tryWithSyncLock(fn func() error) (bool, error) {
|
||||
if r.dbLockHeld {
|
||||
return true, fn()
|
||||
}
|
||||
lockPath, err := r.syncLockPath()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
release, locked, err := tryAcquireSyncLock(lockPath)
|
||||
if err != nil || !locked {
|
||||
return locked, err
|
||||
}
|
||||
r.dbLockHeld = true
|
||||
r.lockStarted = r.nowUTC()
|
||||
r.setSyncLockPhase("locked")
|
||||
defer func() {
|
||||
r.dbLockHeld = false
|
||||
r.lockStarted = time.Time{}
|
||||
_ = release()
|
||||
}()
|
||||
return true, fn()
|
||||
}
|
||||
|
||||
func (r *runtime) setSyncLockPhase(phase string) {
|
||||
if !r.dbLockHeld {
|
||||
return
|
||||
}
|
||||
path, err := r.syncLockPath()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
started := r.lockStarted
|
||||
if started.IsZero() {
|
||||
started = r.nowUTC()
|
||||
}
|
||||
body := fmt.Sprintf("pid=%d\nstarted_at=%s\nupdated_at=%s\nphase=%s\n",
|
||||
os.Getpid(),
|
||||
started.Format(time.RFC3339Nano),
|
||||
r.nowUTC().Format(time.RFC3339Nano),
|
||||
phase,
|
||||
)
|
||||
_ = os.WriteFile(path, []byte(body), 0o600)
|
||||
}
|
||||
|
||||
func (r *runtime) syncLockPath() (string, error) {
|
||||
dbPath, err := config.ExpandPath(r.cfg.DBPath)
|
||||
if err != nil {
|
||||
return "", configErr(err)
|
||||
}
|
||||
return filepath.Join(filepath.Dir(dbPath), ".discrawl-sync.lock"), nil
|
||||
}
|
||||
|
||||
func syncLockErr(ctx context.Context, path string) error {
|
||||
if ctx.Err() != nil {
|
||||
if body, err := os.ReadFile(path); err == nil {
|
||||
details := strings.TrimSpace(string(body))
|
||||
if details != "" {
|
||||
return fmt.Errorf("wait for sync lock %s (%s): %w", path, strings.ReplaceAll(details, "\n", ", "), ctx.Err())
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("wait for sync lock %s: %w", path, ctx.Err())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -1,13 +0,0 @@
|
||||
//go:build !unix && !windows
|
||||
|
||||
package cli
|
||||
|
||||
import "context"
|
||||
|
||||
func acquireSyncLock(context.Context, string) (func() error, error) {
|
||||
return func() error { return nil }, nil
|
||||
}
|
||||
|
||||
func tryAcquireSyncLock(string) (func() error, bool, error) {
|
||||
return func() error { return nil }, true, nil
|
||||
}
|
||||
@ -1,79 +0,0 @@
|
||||
//go:build unix
|
||||
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func acquireSyncLock(ctx context.Context, path string) (func() error, error) {
|
||||
file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open sync lock: %w", err)
|
||||
}
|
||||
locked := false
|
||||
defer func() {
|
||||
if !locked {
|
||||
_ = file.Close()
|
||||
}
|
||||
}()
|
||||
ticker := time.NewTicker(200 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
err = unix.Flock(int(file.Fd()), unix.LOCK_EX|unix.LOCK_NB)
|
||||
if err == nil {
|
||||
locked = true
|
||||
_, _ = file.Seek(0, 0)
|
||||
_ = file.Truncate(0)
|
||||
_, _ = fmt.Fprintf(file, "pid=%d\n", os.Getpid())
|
||||
return func() error {
|
||||
unlockErr := unix.Flock(int(file.Fd()), unix.LOCK_UN)
|
||||
closeErr := file.Close()
|
||||
if unlockErr != nil {
|
||||
return unlockErr
|
||||
}
|
||||
return closeErr
|
||||
}, nil
|
||||
}
|
||||
if !errors.Is(err, unix.EWOULDBLOCK) && !errors.Is(err, unix.EAGAIN) {
|
||||
return nil, fmt.Errorf("acquire sync lock: %w", err)
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, syncLockErr(ctx, path)
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func tryAcquireSyncLock(path string) (func() error, bool, error) {
|
||||
file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("open sync lock: %w", err)
|
||||
}
|
||||
err = unix.Flock(int(file.Fd()), unix.LOCK_EX|unix.LOCK_NB)
|
||||
if err != nil {
|
||||
_ = file.Close()
|
||||
if errors.Is(err, unix.EWOULDBLOCK) || errors.Is(err, unix.EAGAIN) {
|
||||
return nil, false, nil
|
||||
}
|
||||
return nil, false, fmt.Errorf("acquire sync lock: %w", err)
|
||||
}
|
||||
_, _ = file.Seek(0, 0)
|
||||
_ = file.Truncate(0)
|
||||
_, _ = fmt.Fprintf(file, "pid=%d\n", os.Getpid())
|
||||
return func() error {
|
||||
unlockErr := unix.Flock(int(file.Fd()), unix.LOCK_UN)
|
||||
closeErr := file.Close()
|
||||
if unlockErr != nil {
|
||||
return unlockErr
|
||||
}
|
||||
return closeErr
|
||||
}, true, nil
|
||||
}
|
||||
@ -1,76 +0,0 @@
|
||||
//go:build windows
|
||||
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sys/windows"
|
||||
)
|
||||
|
||||
func acquireSyncLock(ctx context.Context, path string) (func() error, error) {
|
||||
file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("open sync lock: %w", err)
|
||||
}
|
||||
locked := false
|
||||
defer func() {
|
||||
if !locked {
|
||||
_ = file.Close()
|
||||
}
|
||||
}()
|
||||
ticker := time.NewTicker(200 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
handle := windows.Handle(file.Fd())
|
||||
overlapped := &windows.Overlapped{}
|
||||
for {
|
||||
err = windows.LockFileEx(handle, windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY, 0, 1, 0, overlapped)
|
||||
if err == nil {
|
||||
locked = true
|
||||
_, _ = file.Seek(0, 0)
|
||||
_ = file.Truncate(0)
|
||||
_, _ = fmt.Fprintf(file, "pid=%d\n", os.Getpid())
|
||||
return func() error {
|
||||
unlockErr := windows.UnlockFileEx(handle, 0, 1, 0, overlapped)
|
||||
closeErr := file.Close()
|
||||
if unlockErr != nil {
|
||||
return unlockErr
|
||||
}
|
||||
return closeErr
|
||||
}, nil
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, syncLockErr(ctx, path)
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func tryAcquireSyncLock(path string) (func() error, bool, error) {
|
||||
file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("open sync lock: %w", err)
|
||||
}
|
||||
handle := windows.Handle(file.Fd())
|
||||
overlapped := &windows.Overlapped{}
|
||||
err = windows.LockFileEx(handle, windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY, 0, 1, 0, overlapped)
|
||||
if err != nil {
|
||||
_ = file.Close()
|
||||
return nil, false, nil
|
||||
}
|
||||
_, _ = file.Seek(0, 0)
|
||||
_ = file.Truncate(0)
|
||||
_, _ = fmt.Fprintf(file, "pid=%d\n", os.Getpid())
|
||||
return func() error {
|
||||
unlockErr := windows.UnlockFileEx(handle, 0, 1, 0, overlapped)
|
||||
closeErr := file.Close()
|
||||
if unlockErr != nil {
|
||||
return unlockErr
|
||||
}
|
||||
return closeErr
|
||||
}, true, nil
|
||||
}
|
||||
@ -1,239 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/openclaw/crawlkit/tui"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func (r *runtime) runTUI(args []string) error {
|
||||
fs := flag.NewFlagSet("tui", flag.ContinueOnError)
|
||||
fs.SetOutput(r.stderr)
|
||||
fs.Usage = func() {
|
||||
_, _ = fmt.Fprintln(fs.Output(), "Usage of tui:")
|
||||
fs.PrintDefaults()
|
||||
_, _ = fmt.Fprintln(fs.Output())
|
||||
_, _ = fmt.Fprintln(fs.Output(), tui.ControlsHelp())
|
||||
}
|
||||
if hasHelpArg(args) {
|
||||
fs.SetOutput(r.stdout)
|
||||
}
|
||||
channel := fs.String("channel", "", "channel id")
|
||||
author := fs.String("author", "", "author/user id")
|
||||
limit := fs.Int("limit", 200, "row limit")
|
||||
includeEmpty := fs.Bool("include-empty", false, "include empty messages")
|
||||
dm := fs.Bool("dm", false, "browse direct messages")
|
||||
guildsFlag := fs.String("guilds", "", "comma-separated guild ids")
|
||||
guildFlag := fs.String("guild", "", "guild id")
|
||||
jsonOut := fs.Bool("json", false, "write browser rows as JSON")
|
||||
if len(args) == 1 && args[0] == "help" {
|
||||
fs.Usage()
|
||||
return nil
|
||||
}
|
||||
if err := fs.Parse(args); err != nil {
|
||||
if errors.Is(err, flag.ErrHelp) {
|
||||
return nil
|
||||
}
|
||||
return usageErr(err)
|
||||
}
|
||||
if *jsonOut {
|
||||
r.json = true
|
||||
}
|
||||
if fs.NArg() != 0 {
|
||||
return usageErr(errors.New("tui takes flags only"))
|
||||
}
|
||||
if *limit <= 0 {
|
||||
return usageErr(errors.New("tui --limit must be positive"))
|
||||
}
|
||||
guildIDs, err := r.resolveTUIGuilds(*dm, *guildFlag, *guildsFlag)
|
||||
if err != nil {
|
||||
return usageErr(err)
|
||||
}
|
||||
if r.store == nil {
|
||||
return tui.Browse(r.ctx, tui.BrowseOptions{
|
||||
AppName: "discrawl",
|
||||
Title: "discrawl archive",
|
||||
EmptyMessage: "discrawl has no local messages yet",
|
||||
JSON: r.json,
|
||||
Layout: tui.LayoutChat,
|
||||
SourceKind: r.archiveSourceKind(),
|
||||
SourceLocation: r.archiveSourceLocation(),
|
||||
Stdout: r.stdout,
|
||||
})
|
||||
}
|
||||
loadRows := func() ([]tui.Row, error) {
|
||||
rows, err := r.store.ListMessagesWithThreadContext(r.ctx, store.MessageListOptions{
|
||||
GuildIDs: guildIDs,
|
||||
Channel: *channel,
|
||||
Author: *author,
|
||||
Last: *limit,
|
||||
IncludeEmpty: *includeEmpty,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return discordTUIRows(rows), nil
|
||||
}
|
||||
archiveRows, err := loadRows()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return tui.Browse(r.ctx, tui.BrowseOptions{
|
||||
AppName: "discrawl",
|
||||
Title: "discrawl archive",
|
||||
EmptyMessage: "discrawl has no local messages yet",
|
||||
Rows: archiveRows,
|
||||
Refresh: func(context.Context) ([]tui.Row, error) { return loadRows() },
|
||||
JSON: r.json,
|
||||
Layout: tui.LayoutChat,
|
||||
SourceKind: r.archiveSourceKind(),
|
||||
SourceLocation: r.archiveSourceLocation(),
|
||||
Stdout: r.stdout,
|
||||
})
|
||||
}
|
||||
|
||||
func (r *runtime) resolveTUIGuilds(dm bool, guild, guilds string) ([]string, error) {
|
||||
guildIDs, err := directMessageGuildScope(dm, guild, guilds)
|
||||
if err != nil || dm || len(guildIDs) > 0 {
|
||||
return guildIDs, err
|
||||
}
|
||||
if defaultGuild := r.cfg.EffectiveDefaultGuildID(); defaultGuild != "" {
|
||||
return []string{defaultGuild}, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *runtime) archiveSourceKind() string {
|
||||
if strings.TrimSpace(r.cfg.Share.Remote) != "" {
|
||||
return tui.SourceRemote
|
||||
}
|
||||
return tui.SourceLocal
|
||||
}
|
||||
|
||||
func (r *runtime) archiveSourceLocation() string {
|
||||
if strings.TrimSpace(r.cfg.Share.Remote) != "" {
|
||||
return r.cfg.Share.Remote
|
||||
}
|
||||
return r.cfg.DBPath
|
||||
}
|
||||
|
||||
func discordTUIRows(rows []store.MessageRow) []tui.Row {
|
||||
items := make([]tui.Row, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
content := discordDisplayContent(row)
|
||||
title := strings.TrimSpace(content)
|
||||
detail := discordDetailContent(row, content)
|
||||
if title == "" {
|
||||
title = firstNonEmpty(strings.TrimSpace(row.AttachmentText), row.MessageID)
|
||||
}
|
||||
tags := []string{row.GuildID, row.ChannelID}
|
||||
if row.GuildID == "@me" {
|
||||
tags = append(tags, "dm")
|
||||
}
|
||||
if row.Source != "" {
|
||||
tags = append(tags, row.Source)
|
||||
}
|
||||
items = append(items, tui.Row{
|
||||
Source: "discord",
|
||||
Kind: "message",
|
||||
ID: row.MessageID,
|
||||
ParentID: row.ReplyToMessage,
|
||||
Scope: discordScopeLabel(row),
|
||||
Container: discordContainerLabel(row),
|
||||
Author: discordAuthorLabel(row),
|
||||
Title: title,
|
||||
Text: content,
|
||||
Detail: detail,
|
||||
URL: discordMessageURL(row),
|
||||
CreatedAt: formatTime(row.CreatedAt),
|
||||
Tags: tags,
|
||||
Fields: map[string]string{
|
||||
"attachment_names": row.AttachmentNames,
|
||||
"attachments": boolString(row.HasAttachments),
|
||||
"author_id": row.AuthorID,
|
||||
"channel_id": row.ChannelID,
|
||||
"guild_id": row.GuildID,
|
||||
"pinned": boolString(row.Pinned),
|
||||
"reply_to": row.ReplyToMessage,
|
||||
"source": row.Source,
|
||||
},
|
||||
})
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
func discordDetailContent(row store.MessageRow, content string) string {
|
||||
var parts []string
|
||||
if strings.TrimSpace(content) != "" {
|
||||
parts = append(parts, strings.TrimSpace(content))
|
||||
}
|
||||
if strings.TrimSpace(row.AttachmentText) != "" {
|
||||
parts = append(parts, "Attachments\n"+strings.TrimSpace(row.AttachmentText))
|
||||
}
|
||||
if len(parts) == 0 {
|
||||
return ""
|
||||
}
|
||||
return strings.Join(parts, "\n\n")
|
||||
}
|
||||
|
||||
func discordDisplayContent(row store.MessageRow) string {
|
||||
if content := strings.TrimSpace(row.DisplayContent); content != "" {
|
||||
return content
|
||||
}
|
||||
return row.Content
|
||||
}
|
||||
|
||||
func discordMessageURL(row store.MessageRow) string {
|
||||
guildID := strings.TrimSpace(row.GuildID)
|
||||
channelID := strings.TrimSpace(row.ChannelID)
|
||||
messageID := strings.TrimSpace(row.MessageID)
|
||||
if guildID == "" || channelID == "" || messageID == "" {
|
||||
return ""
|
||||
}
|
||||
return "https://discord.com/channels/" + guildID + "/" + channelID + "/" + messageID
|
||||
}
|
||||
|
||||
func discordScopeLabel(row store.MessageRow) string {
|
||||
if row.GuildID == "@me" {
|
||||
return "Direct messages"
|
||||
}
|
||||
return firstNonEmpty(row.GuildName, row.GuildID)
|
||||
}
|
||||
|
||||
func discordContainerLabel(row store.MessageRow) string {
|
||||
if row.GuildID == "@me" {
|
||||
return firstNonEmpty(row.ChannelName, "DM "+compactDiscordID(row.ChannelID))
|
||||
}
|
||||
return firstNonEmpty(row.ChannelName, row.ChannelID)
|
||||
}
|
||||
|
||||
func discordAuthorLabel(row store.MessageRow) string {
|
||||
if name := strings.TrimSpace(row.AuthorName); name != "" {
|
||||
return name
|
||||
}
|
||||
if id := strings.TrimSpace(row.AuthorID); id != "" {
|
||||
return "user:" + compactDiscordID(id)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func compactDiscordID(id string) string {
|
||||
id = strings.TrimSpace(id)
|
||||
if len(id) <= 10 {
|
||||
return id
|
||||
}
|
||||
return id[:6] + "..." + id[len(id)-4:]
|
||||
}
|
||||
|
||||
func boolString(value bool) string {
|
||||
if value {
|
||||
return "true"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
@ -1,3 +1,3 @@
|
||||
package cli
|
||||
|
||||
var version = "0.7.0"
|
||||
var version = "0.4.1"
|
||||
|
||||
@ -1,22 +1,22 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
crawlconfig "github.com/openclaw/crawlkit/config"
|
||||
"github.com/pelletier/go-toml/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultConfigEnv = "DISCRAWL_CONFIG"
|
||||
DefaultTokenEnv = "DISCORD_BOT_TOKEN"
|
||||
DefaultTokenKeyringService = "discrawl"
|
||||
DefaultTokenKeyringAccount = "discord_bot_token"
|
||||
DefaultConfigEnv = "DISCRAWL_CONFIG"
|
||||
DefaultTokenEnv = "DISCORD_BOT_TOKEN"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
@ -35,16 +35,15 @@ type Config struct {
|
||||
}
|
||||
|
||||
type DiscordConfig struct {
|
||||
TokenSource string `toml:"token_source"`
|
||||
TokenEnv string `toml:"token_env"`
|
||||
TokenKeyringService string `toml:"token_keyring_service"`
|
||||
TokenKeyringAccount string `toml:"token_keyring_account"`
|
||||
TokenSource string `toml:"token_source"`
|
||||
OpenClawConfig string `toml:"openclaw_config"`
|
||||
Account string `toml:"account"`
|
||||
TokenEnv string `toml:"token_env"`
|
||||
}
|
||||
|
||||
type DesktopConfig struct {
|
||||
Path string `toml:"path"`
|
||||
MaxFileBytes int64 `toml:"max_file_bytes"`
|
||||
FullCache bool `toml:"full_cache"`
|
||||
}
|
||||
|
||||
type SyncConfig struct {
|
||||
@ -85,31 +84,43 @@ type TokenResolution struct {
|
||||
Path string
|
||||
}
|
||||
|
||||
var appConfig = crawlconfig.App{Name: "discrawl", ConfigEnv: DefaultConfigEnv, BaseDir: "~/.discrawl", LegacyBaseDir: "~/.discrawl"}
|
||||
type OpenClawDiscord struct {
|
||||
Token string
|
||||
GuildIDs []string
|
||||
Path string
|
||||
}
|
||||
|
||||
type openClawConfig struct {
|
||||
Channels struct {
|
||||
Discord openClawDiscord `json:"discord"`
|
||||
} `json:"channels"`
|
||||
}
|
||||
|
||||
type openClawDiscord struct {
|
||||
Token string `json:"token"`
|
||||
Accounts map[string]openClawDiscordAcct `json:"accounts"`
|
||||
Guilds map[string]json.RawMessage `json:"guilds"`
|
||||
}
|
||||
|
||||
type openClawDiscordAcct struct {
|
||||
Token string `json:"token"`
|
||||
Guilds map[string]json.RawMessage `json:"guilds"`
|
||||
}
|
||||
|
||||
func Default() Config {
|
||||
home, _ := os.UserHomeDir()
|
||||
paths, err := appConfig.DefaultPaths()
|
||||
if err != nil {
|
||||
base := filepath.Join(home, ".discrawl")
|
||||
paths = crawlconfig.Paths{
|
||||
DBPath: filepath.Join(base, "discrawl.db"),
|
||||
CacheDir: filepath.Join(base, "cache"),
|
||||
LogDir: filepath.Join(base, "logs"),
|
||||
ShareDir: filepath.Join(base, "share"),
|
||||
}
|
||||
}
|
||||
base := filepath.Join(home, ".discrawl")
|
||||
return Config{
|
||||
Version: 1,
|
||||
DBPath: paths.DBPath,
|
||||
CacheDir: paths.CacheDir,
|
||||
LogDir: paths.LogDir,
|
||||
DBPath: filepath.Join(base, "discrawl.db"),
|
||||
CacheDir: filepath.Join(base, "cache"),
|
||||
LogDir: filepath.Join(base, "logs"),
|
||||
DefaultGuildID: "",
|
||||
Discord: DiscordConfig{
|
||||
TokenSource: "env",
|
||||
TokenEnv: DefaultTokenEnv,
|
||||
TokenKeyringService: DefaultTokenKeyringService,
|
||||
TokenKeyringAccount: DefaultTokenKeyringAccount,
|
||||
TokenSource: "openclaw",
|
||||
OpenClawConfig: filepath.Join(home, ".openclaw", "openclaw.json"),
|
||||
Account: "default",
|
||||
TokenEnv: DefaultTokenEnv,
|
||||
},
|
||||
Desktop: DesktopConfig{
|
||||
Path: defaultDiscordDesktopPath(home),
|
||||
@ -120,7 +131,7 @@ func Default() Config {
|
||||
Concurrency: defaultSyncConcurrency(),
|
||||
RepairEvery: "6h",
|
||||
FullHistory: true,
|
||||
AttachmentText: new(true),
|
||||
AttachmentText: boolPtr(true),
|
||||
},
|
||||
Search: SearchConfig{
|
||||
DefaultMode: "fts",
|
||||
@ -135,7 +146,7 @@ func Default() Config {
|
||||
},
|
||||
},
|
||||
Share: ShareConfig{
|
||||
RepoPath: paths.ShareDir,
|
||||
RepoPath: filepath.Join(base, "share"),
|
||||
Branch: "main",
|
||||
AutoUpdate: true,
|
||||
StaleAfter: "15m",
|
||||
@ -156,12 +167,14 @@ func defaultSyncConcurrency() int {
|
||||
}
|
||||
|
||||
func ResolvePath(flagPath string) string {
|
||||
path, err := appConfig.ResolveConfigPath(flagPath)
|
||||
if err != nil {
|
||||
home, _ := os.UserHomeDir()
|
||||
return filepath.Join(home, ".discrawl", "config.toml")
|
||||
if strings.TrimSpace(flagPath) != "" {
|
||||
return flagPath
|
||||
}
|
||||
return path
|
||||
if envPath := strings.TrimSpace(os.Getenv(DefaultConfigEnv)); envPath != "" {
|
||||
return envPath
|
||||
}
|
||||
home, _ := os.UserHomeDir()
|
||||
return filepath.Join(home, ".discrawl", "config.toml")
|
||||
}
|
||||
|
||||
func Load(path string) (Config, error) {
|
||||
@ -170,9 +183,13 @@ func Load(path string) (Config, error) {
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
if err := crawlconfig.LoadTOML(expanded, &cfg); err != nil {
|
||||
data, err := os.ReadFile(expanded)
|
||||
if err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
if err := toml.Unmarshal(data, &cfg); err != nil {
|
||||
return Config{}, fmt.Errorf("parse config: %w", err)
|
||||
}
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return Config{}, err
|
||||
}
|
||||
@ -187,7 +204,14 @@ func Write(path string, cfg Config) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return crawlconfig.WriteTOML(expanded, cfg, 0o600)
|
||||
if err := os.MkdirAll(filepath.Dir(expanded), 0o755); err != nil {
|
||||
return fmt.Errorf("mkdir config dir: %w", err)
|
||||
}
|
||||
data, err := toml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal config: %w", err)
|
||||
}
|
||||
return os.WriteFile(expanded, data, 0o600)
|
||||
}
|
||||
|
||||
func (c *Config) Normalize() error {
|
||||
@ -209,22 +233,18 @@ func (c *Config) Normalize() error {
|
||||
c.LogDir = def.LogDir
|
||||
}
|
||||
}
|
||||
c.Discord.TokenSource = strings.ToLower(strings.TrimSpace(c.Discord.TokenSource))
|
||||
c.Discord.TokenEnv = strings.TrimSpace(c.Discord.TokenEnv)
|
||||
c.Discord.TokenKeyringService = strings.TrimSpace(c.Discord.TokenKeyringService)
|
||||
c.Discord.TokenKeyringAccount = strings.TrimSpace(c.Discord.TokenKeyringAccount)
|
||||
if c.Discord.TokenSource == "" {
|
||||
c.Discord.TokenSource = "env"
|
||||
c.Discord.TokenSource = "openclaw"
|
||||
}
|
||||
if c.Discord.OpenClawConfig == "" {
|
||||
c.Discord.OpenClawConfig = Default().Discord.OpenClawConfig
|
||||
}
|
||||
if c.Discord.Account == "" {
|
||||
c.Discord.Account = "default"
|
||||
}
|
||||
if c.Discord.TokenEnv == "" {
|
||||
c.Discord.TokenEnv = DefaultTokenEnv
|
||||
}
|
||||
if c.Discord.TokenKeyringService == "" {
|
||||
c.Discord.TokenKeyringService = DefaultTokenKeyringService
|
||||
}
|
||||
if c.Discord.TokenKeyringAccount == "" {
|
||||
c.Discord.TokenKeyringAccount = DefaultTokenKeyringAccount
|
||||
}
|
||||
if c.Desktop.Path == "" {
|
||||
c.Desktop.Path = defaultDiscordDesktopPath(homeDir())
|
||||
}
|
||||
@ -242,7 +262,7 @@ func (c *Config) Normalize() error {
|
||||
c.Sync.RepairEvery = "6h"
|
||||
}
|
||||
if c.Sync.AttachmentText == nil {
|
||||
c.Sync.AttachmentText = new(true)
|
||||
c.Sync.AttachmentText = boolPtr(true)
|
||||
}
|
||||
if c.Search.DefaultMode == "" {
|
||||
c.Search.DefaultMode = "fts"
|
||||
@ -341,18 +361,153 @@ func (c Config) ShareEnabled() bool {
|
||||
}
|
||||
|
||||
func EnsureRuntimeDirs(cfg Config) error {
|
||||
return crawlconfig.EnsureRuntimeDirs(crawlconfig.RuntimeConfig{
|
||||
DBPath: cfg.DBPath,
|
||||
CacheDir: cfg.CacheDir,
|
||||
LogDir: cfg.LogDir,
|
||||
})
|
||||
paths := []string{cfg.CacheDir, cfg.LogDir, filepath.Dir(cfg.DBPath)}
|
||||
for _, path := range paths {
|
||||
expanded, err := ExpandPath(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(expanded, 0o755); err != nil {
|
||||
return fmt.Errorf("mkdir %s: %w", expanded, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func ExpandPath(path string) (string, error) {
|
||||
if strings.TrimSpace(path) == "" {
|
||||
return "", errors.New("empty path")
|
||||
}
|
||||
return filepath.Clean(os.ExpandEnv(crawlconfig.ExpandHome(path))), nil
|
||||
if strings.HasPrefix(path, "~/") || path == "~" {
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("home dir: %w", err)
|
||||
}
|
||||
if path == "~" {
|
||||
path = home
|
||||
} else {
|
||||
path = filepath.Join(home, strings.TrimPrefix(path, "~/"))
|
||||
}
|
||||
}
|
||||
return filepath.Clean(os.ExpandEnv(path)), nil
|
||||
}
|
||||
|
||||
func ResolveDiscordToken(cfg Config) (TokenResolution, error) {
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return TokenResolution{}, err
|
||||
}
|
||||
if cfg.Discord.TokenSource == "none" {
|
||||
return TokenResolution{}, errors.New("discord token disabled by config")
|
||||
}
|
||||
if cfg.Discord.TokenSource != "env" {
|
||||
openClaw, err := LoadOpenClawDiscord(cfg.Discord.OpenClawConfig, cfg.Discord.Account)
|
||||
if err == nil && openClaw.Token != "" {
|
||||
return TokenResolution{Token: openClaw.Token, Source: "openclaw", Path: openClaw.Path}, nil
|
||||
}
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return TokenResolution{}, err
|
||||
}
|
||||
}
|
||||
if envToken := NormalizeBotToken(os.Getenv(cfg.Discord.TokenEnv)); envToken != "" {
|
||||
return TokenResolution{Token: envToken, Source: "env", Path: cfg.Discord.TokenEnv}, nil
|
||||
}
|
||||
return TokenResolution{}, errors.New("discord token not found in env or openclaw config")
|
||||
}
|
||||
|
||||
func LoadOpenClawDiscord(path, account string) (OpenClawDiscord, error) {
|
||||
paths, err := openClawCandidates(path)
|
||||
if err != nil {
|
||||
return OpenClawDiscord{}, err
|
||||
}
|
||||
for _, candidate := range paths {
|
||||
info, err := loadOpenClawDiscordFile(candidate, account)
|
||||
if err == nil && info.Token != "" {
|
||||
return info, nil
|
||||
}
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return OpenClawDiscord{}, err
|
||||
}
|
||||
}
|
||||
return OpenClawDiscord{}, os.ErrNotExist
|
||||
}
|
||||
|
||||
func loadOpenClawDiscordFile(path, account string) (OpenClawDiscord, error) {
|
||||
expanded, err := ExpandPath(path)
|
||||
if err != nil {
|
||||
return OpenClawDiscord{}, err
|
||||
}
|
||||
data, err := os.ReadFile(expanded)
|
||||
if err != nil {
|
||||
return OpenClawDiscord{}, err
|
||||
}
|
||||
var payload openClawConfig
|
||||
if err := json.Unmarshal(data, &payload); err != nil {
|
||||
return OpenClawDiscord{}, fmt.Errorf("parse openclaw config: %w", err)
|
||||
}
|
||||
discord := payload.Channels.Discord
|
||||
token := expandOpenClawToken(discord.Token)
|
||||
guildIDs := mapKeys(discord.Guilds)
|
||||
if token == "" {
|
||||
acct := discord.Accounts[normalizeAccount(account)]
|
||||
if acct.Token == "" && account != normalizeAccount(account) {
|
||||
acct = discord.Accounts[account]
|
||||
}
|
||||
token = expandOpenClawToken(acct.Token)
|
||||
if len(guildIDs) == 0 {
|
||||
guildIDs = mapKeys(acct.Guilds)
|
||||
}
|
||||
}
|
||||
return OpenClawDiscord{
|
||||
Token: token,
|
||||
GuildIDs: guildIDs,
|
||||
Path: expanded,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func openClawCandidates(path string) ([]string, error) {
|
||||
expanded, err := ExpandPath(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
candidates := []string{expanded}
|
||||
matches, err := filepath.Glob(expanded + ".bak*")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sort.Strings(matches)
|
||||
candidates = append(candidates, matches...)
|
||||
return uniqueStrings(candidates), nil
|
||||
}
|
||||
|
||||
func NormalizeBotToken(raw string) string {
|
||||
raw = strings.TrimSpace(raw)
|
||||
raw = strings.TrimPrefix(raw, "Bot ")
|
||||
return strings.TrimSpace(raw)
|
||||
}
|
||||
|
||||
func expandOpenClawToken(raw string) string {
|
||||
return NormalizeBotToken(os.ExpandEnv(raw))
|
||||
}
|
||||
|
||||
func normalizeAccount(account string) string {
|
||||
account = strings.TrimSpace(strings.ToLower(account))
|
||||
if account == "" {
|
||||
return "default"
|
||||
}
|
||||
return account
|
||||
}
|
||||
|
||||
func boolPtr(value bool) *bool {
|
||||
return &value
|
||||
}
|
||||
|
||||
func mapKeys[V any](m map[string]V) []string {
|
||||
keys := make([]string, 0, len(m))
|
||||
for key := range m {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
return keys
|
||||
}
|
||||
|
||||
func uniqueStrings(in []string) []string {
|
||||
|
||||
@ -7,7 +7,6 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/zalando/go-keyring"
|
||||
)
|
||||
|
||||
func TestNormalizeFillsDefaults(t *testing.T) {
|
||||
@ -16,10 +15,9 @@ func TestNormalizeFillsDefaults(t *testing.T) {
|
||||
cfg := Config{}
|
||||
require.NoError(t, cfg.Normalize())
|
||||
require.Equal(t, 1, cfg.Version)
|
||||
require.Equal(t, "env", cfg.Discord.TokenSource)
|
||||
require.Equal(t, "openclaw", cfg.Discord.TokenSource)
|
||||
require.Equal(t, "default", cfg.Discord.Account)
|
||||
require.Equal(t, DefaultTokenEnv, cfg.Discord.TokenEnv)
|
||||
require.Equal(t, DefaultTokenKeyringService, cfg.Discord.TokenKeyringService)
|
||||
require.Equal(t, DefaultTokenKeyringAccount, cfg.Discord.TokenKeyringAccount)
|
||||
require.Equal(t, defaultSyncConcurrency(), cfg.Sync.Concurrency)
|
||||
require.GreaterOrEqual(t, cfg.Sync.Concurrency, 8)
|
||||
require.LessOrEqual(t, cfg.Sync.Concurrency, 32)
|
||||
@ -55,8 +53,32 @@ func TestDefaultSyncConcurrencyBounds(t *testing.T) {
|
||||
require.Equal(t, 32, defaultSyncConcurrency())
|
||||
}
|
||||
|
||||
func TestResolveDiscordTokenFromEnv(t *testing.T) {
|
||||
func TestResolveDiscordTokenPrefersOpenClaw(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
openClawPath := filepath.Join(dir, "openclaw.json")
|
||||
require.NoError(t, os.WriteFile(openClawPath, []byte(`{
|
||||
"channels": {
|
||||
"discord": {
|
||||
"token": "Bot config-token",
|
||||
"guilds": { "g1": {}, "g2": {} }
|
||||
}
|
||||
}
|
||||
}`), 0o600))
|
||||
t.Setenv(DefaultTokenEnv, "env-token")
|
||||
|
||||
cfg := Default()
|
||||
cfg.Discord.OpenClawConfig = openClawPath
|
||||
|
||||
token, err := ResolveDiscordToken(cfg)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "config-token", token.Token)
|
||||
require.Equal(t, "openclaw", token.Source)
|
||||
}
|
||||
|
||||
func TestResolveDiscordTokenFallsBackToEnv(t *testing.T) {
|
||||
cfg := Default()
|
||||
cfg.Discord.TokenSource = "env"
|
||||
cfg.Discord.OpenClawConfig = filepath.Join(t.TempDir(), "missing.json")
|
||||
t.Setenv(DefaultTokenEnv, "Bot env-token")
|
||||
|
||||
token, err := ResolveDiscordToken(cfg)
|
||||
@ -65,64 +87,6 @@ func TestResolveDiscordTokenFromEnv(t *testing.T) {
|
||||
require.Equal(t, "env", token.Source)
|
||||
}
|
||||
|
||||
func TestResolveDiscordTokenFallsBackToKeyring(t *testing.T) {
|
||||
cfg := Default()
|
||||
t.Setenv(DefaultTokenEnv, "")
|
||||
stubDiscordTokenKeyring(t, func(service, account string) (string, error) {
|
||||
require.Equal(t, DefaultTokenKeyringService, service)
|
||||
require.Equal(t, DefaultTokenKeyringAccount, account)
|
||||
return "Bot keyring-token", nil
|
||||
})
|
||||
|
||||
token, err := ResolveDiscordToken(cfg)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "keyring-token", token.Token)
|
||||
require.Equal(t, "keyring", token.Source)
|
||||
require.Equal(t, "discrawl/discord_bot_token", token.Path)
|
||||
}
|
||||
|
||||
func TestResolveDiscordTokenFromKeyringSource(t *testing.T) {
|
||||
cfg := Default()
|
||||
cfg.Discord.TokenSource = "keyring"
|
||||
cfg.Discord.TokenKeyringService = " custom-service "
|
||||
cfg.Discord.TokenKeyringAccount = " custom-account "
|
||||
t.Setenv(DefaultTokenEnv, "ignored-env-token")
|
||||
stubDiscordTokenKeyring(t, func(service, account string) (string, error) {
|
||||
require.Equal(t, "custom-service", service)
|
||||
require.Equal(t, "custom-account", account)
|
||||
return "custom-keyring-token", nil
|
||||
})
|
||||
|
||||
token, err := ResolveDiscordToken(cfg)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "custom-keyring-token", token.Token)
|
||||
require.Equal(t, "keyring", token.Source)
|
||||
require.Equal(t, "custom-service/custom-account", token.Path)
|
||||
}
|
||||
|
||||
func TestResolveDiscordTokenFromCustomEnv(t *testing.T) {
|
||||
cfg := Default()
|
||||
cfg.Discord.TokenEnv = "DISCRAWL_TEST_DISCORD_TOKEN"
|
||||
t.Setenv("DISCRAWL_TEST_DISCORD_TOKEN", "custom-env-token")
|
||||
|
||||
token, err := ResolveDiscordToken(cfg)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "custom-env-token", token.Token)
|
||||
require.Equal(t, "DISCRAWL_TEST_DISCORD_TOKEN", token.Path)
|
||||
}
|
||||
|
||||
func TestResolveDiscordTokenRequiresEnvValue(t *testing.T) {
|
||||
cfg := Default()
|
||||
t.Setenv(DefaultTokenEnv, "")
|
||||
stubDiscordTokenKeyring(t, func(_, _ string) (string, error) {
|
||||
return "", keyring.ErrNotFound
|
||||
})
|
||||
|
||||
_, err := ResolveDiscordToken(cfg)
|
||||
require.ErrorContains(t, err, `discord token not found in environment variable "DISCORD_BOT_TOKEN" or keyring item "discrawl"/"discord_bot_token"`)
|
||||
require.ErrorIs(t, err, keyring.ErrNotFound)
|
||||
}
|
||||
|
||||
func TestResolveDiscordTokenDisabled(t *testing.T) {
|
||||
cfg := Default()
|
||||
cfg.Discord.TokenSource = "none"
|
||||
@ -132,12 +96,51 @@ func TestResolveDiscordTokenDisabled(t *testing.T) {
|
||||
require.ErrorContains(t, err, "discord token disabled")
|
||||
}
|
||||
|
||||
func TestResolveDiscordTokenRejectsUnsupportedSource(t *testing.T) {
|
||||
cfg := Default()
|
||||
cfg.Discord.TokenSource = "legacy"
|
||||
func TestLoadOpenClawDiscordFromAccount(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
_, err := ResolveDiscordToken(cfg)
|
||||
require.ErrorContains(t, err, `unsupported discord token_source "legacy"`)
|
||||
dir := t.TempDir()
|
||||
openClawPath := filepath.Join(dir, "openclaw.json")
|
||||
require.NoError(t, os.WriteFile(openClawPath, []byte(`{
|
||||
"channels": {
|
||||
"discord": {
|
||||
"accounts": {
|
||||
"default": {
|
||||
"token": "acct-token",
|
||||
"guilds": { "g3": {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`), 0o600))
|
||||
|
||||
info, err := LoadOpenClawDiscord(openClawPath, "default")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "acct-token", info.Token)
|
||||
require.Equal(t, []string{"g3"}, info.GuildIDs)
|
||||
}
|
||||
|
||||
func TestLoadOpenClawDiscordExpandsEnvToken(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
openClawPath := filepath.Join(dir, "openclaw.json")
|
||||
t.Setenv("DISCRAWL_TEST_TOKEN", "Bot env-expanded-token")
|
||||
require.NoError(t, os.WriteFile(openClawPath, []byte(`{
|
||||
"channels": {
|
||||
"discord": {
|
||||
"accounts": {
|
||||
"default": {
|
||||
"token": "${DISCRAWL_TEST_TOKEN}",
|
||||
"guilds": { "g3": {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`), 0o600))
|
||||
|
||||
info, err := LoadOpenClawDiscord(openClawPath, "default")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "env-expanded-token", info.Token)
|
||||
require.Equal(t, []string{"g3"}, info.GuildIDs)
|
||||
}
|
||||
|
||||
func TestWriteAndLoadRoundTrip(t *testing.T) {
|
||||
@ -246,7 +249,7 @@ func TestAttachmentTextExplicitFalseSurvivesNormalize(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := Default()
|
||||
cfg.Sync.AttachmentText = new(false)
|
||||
cfg.Sync.AttachmentText = boolPtr(false)
|
||||
require.NoError(t, cfg.Normalize())
|
||||
require.False(t, cfg.AttachmentTextEnabled())
|
||||
}
|
||||
@ -259,7 +262,7 @@ func TestExpandPath(t *testing.T) {
|
||||
require.Contains(t, path, "discrawl-test")
|
||||
}
|
||||
|
||||
func TestResolvePath(t *testing.T) {
|
||||
func TestResolvePathAndLoadOpenClawFallbacks(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
envPath := filepath.Join(dir, "env.toml")
|
||||
t.Setenv(DefaultConfigEnv, envPath)
|
||||
@ -269,6 +272,43 @@ func TestResolvePath(t *testing.T) {
|
||||
require.Contains(t, ResolvePath(""), filepath.Join(".discrawl", "config.toml"))
|
||||
_, err := ExpandPath("")
|
||||
require.ErrorContains(t, err, "empty path")
|
||||
|
||||
openClawPath := filepath.Join(dir, "openclaw.json")
|
||||
require.NoError(t, os.WriteFile(openClawPath, []byte(`{}`), 0o600))
|
||||
require.NoError(t, os.WriteFile(openClawPath+".bak", []byte(`{
|
||||
"channels": {
|
||||
"discord": {
|
||||
"accounts": {
|
||||
"Work Account": {
|
||||
"token": "backup-token",
|
||||
"guilds": { "g9": {} }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`), 0o600))
|
||||
|
||||
info, err := LoadOpenClawDiscord(openClawPath, "Work Account")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "backup-token", info.Token)
|
||||
require.Equal(t, []string{"g9"}, info.GuildIDs)
|
||||
|
||||
_, err = LoadOpenClawDiscord(filepath.Join(dir, "missing.json"), "default")
|
||||
require.ErrorIs(t, err, os.ErrNotExist)
|
||||
}
|
||||
|
||||
func TestOpenClawCandidatesIncludesBackups(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
dir := t.TempDir()
|
||||
base := filepath.Join(dir, "openclaw.json")
|
||||
require.NoError(t, os.WriteFile(base, []byte(`{}`), 0o600))
|
||||
require.NoError(t, os.WriteFile(base+".bak", []byte(`{}`), 0o600))
|
||||
require.NoError(t, os.WriteFile(base+".bak.1", []byte(`{}`), 0o600))
|
||||
|
||||
paths, err := openClawCandidates(base)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, paths, 3)
|
||||
}
|
||||
|
||||
func TestEffectiveDefaultGuildAndDirs(t *testing.T) {
|
||||
@ -276,9 +316,12 @@ func TestEffectiveDefaultGuildAndDirs(t *testing.T) {
|
||||
|
||||
require.Equal(t, "explicit", Config{DefaultGuildID: "explicit", GuildIDs: []string{"g1"}}.EffectiveDefaultGuildID())
|
||||
require.Empty(t, Config{GuildIDs: []string{"g1", "g2"}}.EffectiveDefaultGuildID())
|
||||
require.Equal(t, "default", normalizeAccount(""))
|
||||
require.Equal(t, "work", normalizeAccount(" Work "))
|
||||
require.Equal(t, []string{"a", "b"}, uniqueStrings([]string{" a ", "", "b", "a"}))
|
||||
require.Equal(t, "token", NormalizeBotToken(" token "))
|
||||
require.Nil(t, uniqueStrings(nil))
|
||||
require.Equal(t, []string{"a", "b"}, mapKeys(map[string]int{"b": 2, "a": 1}))
|
||||
|
||||
cfg := Default()
|
||||
cfg.GuildIDs = []string{"g1"}
|
||||
@ -300,9 +343,6 @@ func TestResolvePathUsesEnv(t *testing.T) {
|
||||
func TestConfigErrorsAndBackupFallback(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
t.Setenv(DefaultTokenEnv, "")
|
||||
stubDiscordTokenKeyring(t, func(_, _ string) (string, error) {
|
||||
return "", keyring.ErrNotFound
|
||||
})
|
||||
|
||||
_, err := ExpandPath("")
|
||||
require.Error(t, err)
|
||||
@ -313,15 +353,15 @@ func TestConfigErrorsAndBackupFallback(t *testing.T) {
|
||||
require.Error(t, err)
|
||||
|
||||
cfg := Default()
|
||||
cfg.Discord.OpenClawConfig = filepath.Join(dir, "missing.json")
|
||||
_, err = ResolveDiscordToken(cfg)
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func stubDiscordTokenKeyring(t *testing.T, get func(service, account string) (string, error)) {
|
||||
t.Helper()
|
||||
old := discordTokenKeyringGet
|
||||
discordTokenKeyringGet = get
|
||||
t.Cleanup(func() {
|
||||
discordTokenKeyringGet = old
|
||||
})
|
||||
base := filepath.Join(dir, "openclaw.json")
|
||||
backup := base + ".bak"
|
||||
require.NoError(t, os.WriteFile(base, []byte(`{}`), 0o600))
|
||||
require.NoError(t, os.WriteFile(backup, []byte(`{"channels":{"discord":{"token":"backup-token"}}}`), 0o600))
|
||||
info, err := LoadOpenClawDiscord(base, "default")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "backup-token", info.Token)
|
||||
}
|
||||
|
||||
@ -1,64 +0,0 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/zalando/go-keyring"
|
||||
)
|
||||
|
||||
var discordTokenKeyringGet = keyring.Get
|
||||
|
||||
func ResolveDiscordToken(cfg Config) (TokenResolution, error) {
|
||||
if err := cfg.Normalize(); err != nil {
|
||||
return TokenResolution{}, err
|
||||
}
|
||||
switch cfg.Discord.TokenSource {
|
||||
case "none":
|
||||
return TokenResolution{}, errors.New("discord token disabled by config")
|
||||
case "env":
|
||||
envToken := NormalizeBotToken(os.Getenv(cfg.Discord.TokenEnv))
|
||||
if envToken != "" {
|
||||
return TokenResolution{Token: envToken, Source: "env", Path: cfg.Discord.TokenEnv}, nil
|
||||
}
|
||||
token, err := resolveDiscordTokenFromKeyring(cfg)
|
||||
if err == nil {
|
||||
return token, nil
|
||||
}
|
||||
return TokenResolution{}, fmt.Errorf(
|
||||
"discord token not found in environment variable %q or keyring item %q/%q: %w",
|
||||
cfg.Discord.TokenEnv,
|
||||
cfg.Discord.TokenKeyringService,
|
||||
cfg.Discord.TokenKeyringAccount,
|
||||
err,
|
||||
)
|
||||
case "keyring":
|
||||
return resolveDiscordTokenFromKeyring(cfg)
|
||||
default:
|
||||
return TokenResolution{}, fmt.Errorf("unsupported discord token_source %q", cfg.Discord.TokenSource)
|
||||
}
|
||||
}
|
||||
|
||||
func resolveDiscordTokenFromKeyring(cfg Config) (TokenResolution, error) {
|
||||
raw, err := discordTokenKeyringGet(cfg.Discord.TokenKeyringService, cfg.Discord.TokenKeyringAccount)
|
||||
if err != nil {
|
||||
return TokenResolution{}, err
|
||||
}
|
||||
token := NormalizeBotToken(raw)
|
||||
if token == "" {
|
||||
return TokenResolution{}, errors.New("keyring item is empty")
|
||||
}
|
||||
return TokenResolution{
|
||||
Token: token,
|
||||
Source: "keyring",
|
||||
Path: cfg.Discord.TokenKeyringService + "/" + cfg.Discord.TokenKeyringAccount,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NormalizeBotToken(raw string) string {
|
||||
raw = strings.TrimSpace(raw)
|
||||
raw = strings.TrimPrefix(raw, "Bot ")
|
||||
return strings.TrimSpace(raw)
|
||||
}
|
||||
@ -2,7 +2,6 @@ package discord
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"slices"
|
||||
@ -180,7 +179,7 @@ func (c *Client) ChannelMessage(ctx context.Context, channelID, messageID string
|
||||
|
||||
func (c *Client) Tail(ctx context.Context, handler EventHandler) error {
|
||||
if handler == nil {
|
||||
return errors.New("missing event handler")
|
||||
return fmt.Errorf("missing event handler")
|
||||
}
|
||||
tailCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
@ -188,8 +187,10 @@ func (c *Client) Tail(ctx context.Context, handler EventHandler) error {
|
||||
errCh := make(chan error, 1)
|
||||
workCh := make(chan func(context.Context) error, c.tailQueueSize)
|
||||
var wg sync.WaitGroup
|
||||
for range c.tailWorkerCount {
|
||||
wg.Go(func() {
|
||||
for i := 0; i < c.tailWorkerCount; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for {
|
||||
select {
|
||||
case <-tailCtx.Done():
|
||||
@ -206,7 +207,7 @@ func (c *Client) Tail(ctx context.Context, handler EventHandler) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}()
|
||||
}
|
||||
|
||||
c.session.AddHandler(func(_ *discordgo.Session, evt *discordgo.MessageCreate) {
|
||||
@ -298,7 +299,7 @@ func (c *Client) enqueueTailTask(
|
||||
case workCh <- task:
|
||||
default:
|
||||
select {
|
||||
case errCh <- errors.New("tail worker queue full"):
|
||||
case errCh <- fmt.Errorf("tail worker queue full"):
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
@ -400,7 +400,7 @@ func TestTailFailsFastWhenWorkerQueueFills(t *testing.T) {
|
||||
return
|
||||
}
|
||||
now := time.Now().UTC().Format(time.RFC3339)
|
||||
for i := range 4 {
|
||||
for i := 0; i < 4; i++ {
|
||||
if err := conn.WriteJSON(map[string]any{
|
||||
"op": 0,
|
||||
"t": "MESSAGE_CREATE",
|
||||
|
||||
@ -1,198 +0,0 @@
|
||||
package discorddesktop
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type userLabel struct {
|
||||
Name string
|
||||
Priority int
|
||||
}
|
||||
|
||||
func collectUserLabel(snap snapshot, raw map[string]any) {
|
||||
id := stringField(raw, "id")
|
||||
if !looksSnowflake(id) || !looksUserObject(raw) {
|
||||
return
|
||||
}
|
||||
name, priority := userObjectLabel(raw)
|
||||
if name == "" {
|
||||
return
|
||||
}
|
||||
if existing, ok := snap.userLabels[id]; !ok || priority > existing.Priority || existing.Name == "" {
|
||||
snap.userLabels[id] = userLabel{Name: name, Priority: priority}
|
||||
}
|
||||
}
|
||||
|
||||
func looksUserObject(raw map[string]any) bool {
|
||||
for _, key := range []string{"username", "global_name", "display_name", "discriminator", "avatar", "bot", "public_flags"} {
|
||||
if _, ok := raw[key]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func userObjectLabel(raw map[string]any) (string, int) {
|
||||
if name := stringField(raw, "global_name"); name != "" {
|
||||
return name, 3
|
||||
}
|
||||
if name := stringField(raw, "display_name"); name != "" {
|
||||
return name, 2
|
||||
}
|
||||
if name := stringField(raw, "username"); name != "" {
|
||||
return name, 1
|
||||
}
|
||||
return "", 0
|
||||
}
|
||||
|
||||
func inferDirectMessageNames(snap snapshot) {
|
||||
authorChannels := map[string]map[string]struct{}{}
|
||||
channelAuthors := map[string]map[string]int{}
|
||||
for id, msg := range snap.messages {
|
||||
if label, ok := snap.userLabels[msg.Record.AuthorID]; ok && shouldUseUserLabel(msg.Record.AuthorName, label) {
|
||||
msg.Record.AuthorName = label.Name
|
||||
msg.Record.RawJSON = withRawAuthorLabel(msg.Record.RawJSON, msg.Record.AuthorID, label)
|
||||
msg.PayloadJSON = withRawAuthorLabel(msg.PayloadJSON, msg.Record.AuthorID, label)
|
||||
snap.messages[id] = msg
|
||||
}
|
||||
if msg.Record.GuildID != DirectMessageGuildID || msg.Record.AuthorID == "" {
|
||||
continue
|
||||
}
|
||||
if authorChannels[msg.Record.AuthorID] == nil {
|
||||
authorChannels[msg.Record.AuthorID] = map[string]struct{}{}
|
||||
}
|
||||
authorChannels[msg.Record.AuthorID][msg.Record.ChannelID] = struct{}{}
|
||||
if channelAuthors[msg.Record.ChannelID] == nil {
|
||||
channelAuthors[msg.Record.ChannelID] = map[string]int{}
|
||||
}
|
||||
channelAuthors[msg.Record.ChannelID][msg.Record.AuthorID]++
|
||||
}
|
||||
|
||||
selfID := mostRepeatedDirectMessageAuthor(authorChannels)
|
||||
for id, channel := range snap.channels {
|
||||
if channel.GuildID != DirectMessageGuildID || !isFallbackChannelName(channel.Name, id) {
|
||||
continue
|
||||
}
|
||||
name := directMessageChannelName(channelAuthors[id], snap.userLabels, selfID)
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
channel.Name = name
|
||||
channel.RawJSON = withRawChannelName(channel.RawJSON, id, channel.GuildID, name, channel.Kind)
|
||||
snap.channels[id] = channel
|
||||
}
|
||||
}
|
||||
|
||||
func shouldUseUserLabel(current string, label userLabel) bool {
|
||||
if label.Name == "" || current == label.Name {
|
||||
return false
|
||||
}
|
||||
return current == "" || label.Priority >= 2
|
||||
}
|
||||
|
||||
func mostRepeatedDirectMessageAuthor(authorChannels map[string]map[string]struct{}) string {
|
||||
selfID := ""
|
||||
selfChannels := 1
|
||||
for authorID, channels := range authorChannels {
|
||||
if len(channels) > selfChannels {
|
||||
selfID = authorID
|
||||
selfChannels = len(channels)
|
||||
}
|
||||
}
|
||||
return selfID
|
||||
}
|
||||
|
||||
func directMessageChannelName(authorCounts map[string]int, labels map[string]userLabel, selfID string) string {
|
||||
candidates := []string{}
|
||||
bestID := ""
|
||||
bestCount := -1
|
||||
for authorID, count := range authorCounts {
|
||||
label, ok := labels[authorID]
|
||||
if !ok || label.Name == "" {
|
||||
continue
|
||||
}
|
||||
if authorID == selfID && len(authorCounts) > 1 {
|
||||
continue
|
||||
}
|
||||
if len(authorCounts) > 2 {
|
||||
candidates = append(candidates, label.Name)
|
||||
continue
|
||||
}
|
||||
if count > bestCount || (count == bestCount && label.Priority > labels[bestID].Priority) {
|
||||
bestID = authorID
|
||||
bestCount = count
|
||||
}
|
||||
}
|
||||
if len(candidates) > 0 {
|
||||
sort.Strings(candidates)
|
||||
return strings.Join(candidates, ", ")
|
||||
}
|
||||
if bestID == "" {
|
||||
return ""
|
||||
}
|
||||
return labels[bestID].Name
|
||||
}
|
||||
|
||||
func isFallbackChannelName(name, id string) bool {
|
||||
name = strings.TrimSpace(name)
|
||||
return name == "" || name == "channel-"+shortID(id) || name == "dm-"+shortID(id)
|
||||
}
|
||||
|
||||
func withRawChannelName(rawJSON, id, guildID, name, kind string) string {
|
||||
raw := map[string]any{}
|
||||
if rawJSON != "" {
|
||||
_ = json.Unmarshal([]byte(rawJSON), &raw)
|
||||
}
|
||||
raw["id"] = id
|
||||
raw["guild_id"] = guildID
|
||||
raw["name"] = name
|
||||
raw["kind"] = kind
|
||||
raw["source"] = "discord_desktop"
|
||||
body, err := json.Marshal(raw)
|
||||
if err != nil {
|
||||
return rawJSON
|
||||
}
|
||||
return string(body)
|
||||
}
|
||||
|
||||
func withRawAuthorLabel(rawJSON, authorID string, label userLabel) string {
|
||||
if rawJSON == "" || authorID == "" || label.Name == "" {
|
||||
return rawJSON
|
||||
}
|
||||
raw := map[string]any{}
|
||||
if err := json.Unmarshal([]byte(rawJSON), &raw); err != nil {
|
||||
return rawJSON
|
||||
}
|
||||
author, _ := raw["author"].(map[string]any)
|
||||
if author == nil {
|
||||
author = map[string]any{}
|
||||
}
|
||||
author["id"] = authorID
|
||||
if label.Priority >= 2 {
|
||||
author["global_name"] = label.Name
|
||||
} else {
|
||||
author["username"] = label.Name
|
||||
}
|
||||
raw["author"] = author
|
||||
body, err := json.Marshal(raw)
|
||||
if err != nil {
|
||||
return rawJSON
|
||||
}
|
||||
return string(body)
|
||||
}
|
||||
|
||||
func sanitizedRawAuthor(raw map[string]any, authorID string) map[string]any {
|
||||
author, _ := raw["author"].(map[string]any)
|
||||
out := map[string]any{}
|
||||
if authorID != "" {
|
||||
out["id"] = authorID
|
||||
}
|
||||
for _, key := range []string{"username", "global_name", "display_name"} {
|
||||
if value := stringField(author, key); value != "" {
|
||||
out[key] = value
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,198 +0,0 @@
|
||||
package discorddesktop
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestFileFingerprintStatusHelpers(t *testing.T) {
|
||||
base := fileFingerprint{Size: 123, ModUnixNS: 456}
|
||||
require.True(t, sameFileFingerprint(base, fileFingerprint{Size: 123, ModUnixNS: 456, Status: fileStatusSkipped}))
|
||||
require.False(t, sameFileFingerprint(base, fileFingerprint{Size: 124, ModUnixNS: 456}))
|
||||
require.False(t, sameFileFingerprint(base, fileFingerprint{Size: 123, ModUnixNS: 457}))
|
||||
|
||||
require.True(t, isImportedFingerprint(base))
|
||||
require.True(t, isImportedFingerprint(importedFingerprint(base)))
|
||||
require.False(t, isImportedFingerprint(skippedFingerprint(base)))
|
||||
require.Equal(t, fileStatusImported, importedFingerprint(base).Status)
|
||||
require.Equal(t, fileStatusSkipped, skippedFingerprint(base).Status)
|
||||
require.Equal(t, wiretapFileIndexScope, fileIndexScope(Options{}))
|
||||
require.Equal(t, wiretapFileIndexScope, fileIndexScope(Options{FullCache: true}))
|
||||
}
|
||||
|
||||
func TestSnapshotCopyHelpers(t *testing.T) {
|
||||
base := newSnapshot()
|
||||
base.routes["111111111111111121"] = "999999999999999996"
|
||||
base.userLabels["222222222222222232"] = userLabel{Name: "Alice"}
|
||||
base.channels["111111111111111121"] = store.ChannelRecord{ID: "111111111111111121", GuildID: "999999999999999996", Name: "general"}
|
||||
|
||||
snap := newSnapshotWithContext(base)
|
||||
require.Equal(t, base.routes, snap.routes)
|
||||
require.Equal(t, base.userLabels, snap.userLabels)
|
||||
require.Empty(t, snap.channels)
|
||||
|
||||
next := newSnapshot()
|
||||
next.routes["111111111111111122"] = "999999999999999996"
|
||||
next.userLabels["222222222222222233"] = userLabel{Name: "Bob"}
|
||||
next.channels["111111111111111122"] = store.ChannelRecord{ID: "111111111111111122", GuildID: "999999999999999996", Name: "random"}
|
||||
mergeSnapshotContext(base, next)
|
||||
|
||||
require.Equal(t, "999999999999999996", base.routes["111111111111111122"])
|
||||
require.Equal(t, "Bob", base.userLabels["222222222222222233"].Name)
|
||||
require.Equal(t, "random", base.channels["111111111111111122"].Name)
|
||||
|
||||
lookup := copyChannelLookup(base.channels)
|
||||
lookup["111111111111111122"] = store.ChannelRecord{ID: "changed"}
|
||||
require.Equal(t, "random", base.channels["111111111111111122"].Name)
|
||||
}
|
||||
|
||||
func TestSnapshotWithoutMessageEvents(t *testing.T) {
|
||||
snap := newSnapshot()
|
||||
snap.messages["333333333333333346"] = store.MessageMutation{
|
||||
Record: store.MessageRecord{ID: "333333333333333346"},
|
||||
Options: store.WriteOptions{
|
||||
AppendEvent: true,
|
||||
EnqueueEmbedding: true,
|
||||
},
|
||||
}
|
||||
stripped := snapshotWithoutMessageEvents(snap)
|
||||
require.False(t, stripped.messages["333333333333333346"].Options.AppendEvent)
|
||||
require.True(t, stripped.messages["333333333333333346"].Options.EnqueueEmbedding)
|
||||
require.True(t, snap.messages["333333333333333346"].Options.AppendEvent)
|
||||
}
|
||||
|
||||
func TestRouteFilteredCacheHelpers(t *testing.T) {
|
||||
require.Equal(t, fileSourceCacheData, sourceForPath("/tmp/discord", "/tmp/discord/Cache/Cache_Data/entry", "Cache/Cache_Data/entry"))
|
||||
require.Equal(t, fileSourceCacheData, sourceForPath("/tmp/discord", "/tmp/discord/Service Worker/CacheStorage/cache/entry", "Service Worker/CacheStorage/cache/entry"))
|
||||
require.Equal(t, fileSourceContext, sourceForPath("/tmp/discord", "/tmp/discord/Local Storage/leveldb/000001.log", "Local Storage/leveldb/000001.log"))
|
||||
}
|
||||
|
||||
func TestCacheFileHasRouteHint(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, os.WriteFile(filepath.Join(dir, "route"), []byte("https://discord.com/api/v9/channels/111111111111111121/messages?limit=50"), 0o600))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(dir, "plain"), []byte("no discord route here"), 0o600))
|
||||
|
||||
root, err := os.OpenRoot(dir)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = root.Close() }()
|
||||
|
||||
ok, err := cacheFileHasRouteHint(root, "route")
|
||||
require.NoError(t, err)
|
||||
require.True(t, ok)
|
||||
ok, err = cacheFileHasRouteHint(root, "plain")
|
||||
require.NoError(t, err)
|
||||
require.False(t, ok)
|
||||
_, err = cacheFileHasRouteHint(root, "missing")
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func TestImportAndStateEdgeBranches(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
_, err := Import(ctx, nil, Options{})
|
||||
require.ErrorContains(t, err, "store is required")
|
||||
|
||||
configHome := t.TempDir()
|
||||
t.Setenv("XDG_CONFIG_HOME", configHome)
|
||||
if runtime.GOOS == "linux" {
|
||||
require.Equal(t, filepath.Join(configHome, "discord"), DefaultPath())
|
||||
}
|
||||
|
||||
dir := t.TempDir()
|
||||
s, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = s.Close() }()
|
||||
|
||||
stats, err := Import(ctx, s, Options{
|
||||
Path: dir,
|
||||
Now: func() time.Time { return time.Date(2026, 5, 8, 12, 0, 0, 0, time.UTC) },
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, dir, stats.Path)
|
||||
require.Equal(t, 1, stats.Checkpoints)
|
||||
|
||||
stats, err = Import(ctx, nil, Options{Path: filepath.Join(dir, "missing"), DryRun: true})
|
||||
require.NoError(t, err)
|
||||
require.True(t, stats.DryRun)
|
||||
|
||||
stats, err = Import(ctx, nil, Options{Path: dir, DryRun: true, FullCache: true})
|
||||
require.NoError(t, err)
|
||||
require.True(t, stats.FullCache)
|
||||
|
||||
require.NoError(t, s.SetSyncState(ctx, fileIndexScope(Options{}), "{not-json"))
|
||||
require.NoError(t, s.UpsertChannel(ctx, store.ChannelRecord{ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`}))
|
||||
state, err := loadScanState(ctx, s, Options{})
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, state.previous)
|
||||
require.Equal(t, "general", state.channels["c1"].Name)
|
||||
}
|
||||
|
||||
func TestSnapshotFinalizeAndCommitBranches(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
s, err := store.Open(ctx, filepath.Join(t.TempDir(), "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = s.Close() }()
|
||||
|
||||
snap := newSnapshot()
|
||||
snap.messages["m-missing"] = store.MessageMutation{
|
||||
Record: store.MessageRecord{ID: "m-missing", ChannelID: "c-missing", RawJSON: `{}`},
|
||||
}
|
||||
snap.messages["m-known"] = store.MessageMutation{
|
||||
Record: store.MessageRecord{ID: "m-known", GuildID: "g1", ChannelID: "c1", ChannelName: "general", RawJSON: `{}`},
|
||||
}
|
||||
stats := &Stats{}
|
||||
totals := newScanTotals()
|
||||
unresolved := finalizeSnapshot(snap, map[string]store.ChannelRecord{
|
||||
"c1": {ID: "c1", GuildID: "g1", Kind: "text", Name: "general", RawJSON: `{}`},
|
||||
}, totals, stats, true)
|
||||
require.Equal(t, unresolvedMessages{"m-missing": "c-missing"}, unresolved)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
require.Equal(t, 1, stats.SkippedMessages)
|
||||
require.Equal(t, "general", snap.channels["c1"].Name)
|
||||
require.Equal(t, "g1", snap.guilds["g1"].ID)
|
||||
|
||||
more := unresolvedMessages{"m2": "c2"}
|
||||
mergeUnresolved(unresolved, more)
|
||||
recordUnresolved(unresolved, totals, stats)
|
||||
require.Equal(t, 2, stats.SkippedMessages)
|
||||
|
||||
state := scanState{current: map[string]fileFingerprint{}}
|
||||
candidates := []fileCandidate{{relKey: "Cache_Data/entry", fingerprint: fileFingerprint{Size: 10, ModUnixNS: 20}}}
|
||||
require.NoError(t, commitSnapshot(ctx, s, Options{DryRun: true}, state, candidates, newSnapshot(), true, stats))
|
||||
require.NoError(t, commitSnapshot(ctx, s, Options{}, state, candidates, newSnapshot(), false, stats))
|
||||
require.NoError(t, commitSnapshot(ctx, s, Options{}, state, candidates, newSnapshot(), true, stats))
|
||||
require.True(t, isImportedFingerprint(state.current["Cache_Data/entry"]))
|
||||
|
||||
require.NoError(t, checkpointScannedCandidates(ctx, s, Options{DryRun: true}, state, candidates, stats))
|
||||
require.NoError(t, checkpointScannedCandidates(ctx, s, Options{}, state, candidates, stats))
|
||||
}
|
||||
|
||||
func TestRouteHintCollectionBranches(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
require.NoError(t, os.WriteFile(filepath.Join(dir, "route"), []byte("https://discord.com/channels/123456789012/111111111111111121"), 0o600))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(dir, "plain"), []byte("plain"), 0o600))
|
||||
|
||||
root, err := os.OpenRoot(dir)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = root.Close() }()
|
||||
|
||||
snap := newSnapshot()
|
||||
err = collectCacheRouteHints(context.Background(), root, []fileCandidate{
|
||||
{relPath: "missing"},
|
||||
{relPath: "plain"},
|
||||
{relPath: "route"},
|
||||
}, snap)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "123456789012", snap.routes["111111111111111121"])
|
||||
|
||||
canceled, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
require.ErrorIs(t, collectCacheRouteHints(canceled, root, []fileCandidate{{relPath: "route"}}, newSnapshot()), context.Canceled)
|
||||
}
|
||||
@ -1,387 +0,0 @@
|
||||
package discorddesktop
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestImportFastCacheSkipsUnroutedCacheDataUnlessFullCache(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(`
|
||||
{"id":"111111111111111121","guild_id":"999999999999999996","type":0,"name":"slow-cache"}
|
||||
{"id":"333333333333333346","channel_id":"111111111111111121","content":"unrouted historical cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`), 0o600))
|
||||
|
||||
fastStore, err := store.Open(ctx, filepath.Join(dir, "fast.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = fastStore.Close() }()
|
||||
|
||||
stats, err := Import(ctx, fastStore, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.CacheFilesFastSkipped)
|
||||
require.Equal(t, 0, stats.Messages)
|
||||
|
||||
results, err := fastStore.SearchMessages(ctx, store.SearchOptions{Query: "unrouted historical", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, results)
|
||||
|
||||
stats, err = Import(ctx, fastStore, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, 0, stats.CacheFilesFastSkipped)
|
||||
require.Equal(t, 1, stats.FilesUnchanged)
|
||||
|
||||
stats, err = Import(ctx, fastStore, Options{Path: dir, FullCache: true})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
|
||||
fullStore, err := store.Open(ctx, filepath.Join(dir, "full.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = fullStore.Close() }()
|
||||
|
||||
stats, err = Import(ctx, fullStore, Options{Path: dir, FullCache: true})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.FilesScanned)
|
||||
require.Equal(t, 0, stats.CacheFilesFastSkipped)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
|
||||
results, err = fullStore.SearchMessages(ctx, store.SearchOptions{Query: "unrouted historical", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
require.Equal(t, "slow-cache", results[0].ChannelName)
|
||||
}
|
||||
|
||||
func TestImportCheckpointsCacheBatches(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
for i := range checkpointEveryFiles + 1 {
|
||||
channelID := "111111111111111121"
|
||||
messageID := 333333333333333346 + i
|
||||
body := bytesf(`https://discord.com/channels/999999999999999996/%s
|
||||
{"id":"%d","channel_id":"%s","content":"checkpoint cache %d","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`, channelID, messageID, channelID, i)
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), body, 0o600))
|
||||
}
|
||||
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, checkpointEveryFiles+1, stats.FilesScanned)
|
||||
require.Equal(t, checkpointEveryFiles+1, stats.Messages)
|
||||
require.GreaterOrEqual(t, stats.Checkpoints, 2)
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
|
||||
}
|
||||
|
||||
func TestImportUsesLaterCacheMetadataBeforeCheckpointingEarlierBatch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
|
||||
channelID := "111111111111111121"
|
||||
guildID := "999999999999999996"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
||||
{"id":"333333333333333346","channel_id":"%s","content":"needs later channel metadata","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`, channelID, channelID), 0o600))
|
||||
for i := 1; i < checkpointEveryFiles; i++ {
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), bytesf(
|
||||
"https://discord.com/api/v9/channels/%s/messages?limit=50\n",
|
||||
channelID,
|
||||
), 0o600))
|
||||
}
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", checkpointEveryFiles)), bytesf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
||||
{"id":"%s","guild_id":"%s","type":0,"name":"later-metadata"}
|
||||
`, channelID, channelID, guildID), 0o600))
|
||||
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, checkpointEveryFiles+1+checkpointEveryFiles, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
require.GreaterOrEqual(t, stats.Checkpoints, 2)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "needs later channel metadata", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
require.Equal(t, guildID, results[0].GuildID)
|
||||
require.Equal(t, "later-metadata", results[0].ChannelName)
|
||||
requireMessageCount(t, ctx, st, "message_events", 1)
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
|
||||
requireMessageCount(t, ctx, st, "message_events", 1)
|
||||
}
|
||||
|
||||
func TestImportCheckpointsPartiallyResolvedRetryBatch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
|
||||
resolvedChannelID := "111111111111111121"
|
||||
unresolvedChannelID := "111111111111111122"
|
||||
guildID := "999999999999999996"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/api/v10/channels/%s/messages?limit=50
|
||||
https://discord.com/api/v9/channels/%s/messages?limit=50
|
||||
{"id":"333333333333333346","channel_id":"%s","content":"partially resolved retry message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
{"id":"333333333333333347","channel_id":"%s","content":"still unresolved retry message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`, resolvedChannelID, unresolvedChannelID, resolvedChannelID, unresolvedChannelID), 0o600))
|
||||
for i := 1; i < checkpointEveryFiles; i++ {
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", i)), bytesf(
|
||||
"https://discord.com/api/v9/channels/%s/messages?limit=50\n",
|
||||
resolvedChannelID,
|
||||
), 0o600))
|
||||
}
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, fmt.Sprintf("entry_%03d", checkpointEveryFiles)), bytesf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
||||
{"id":"%s","guild_id":"%s","type":0,"name":"partially-resolved"}
|
||||
`, resolvedChannelID, resolvedChannelID, guildID), 0o600))
|
||||
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, checkpointEveryFiles+1+checkpointEveryFiles, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
require.Equal(t, 1, stats.SkippedMessages)
|
||||
require.GreaterOrEqual(t, stats.Checkpoints, 2)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "partially resolved retry", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
require.Equal(t, "partially-resolved", results[0].ChannelName)
|
||||
results, err = st.SearchMessages(ctx, store.SearchOptions{Query: "still unresolved retry", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, results)
|
||||
requireMessageCount(t, ctx, st, "message_events", 1)
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, checkpointEveryFiles+1, stats.FilesUnchanged)
|
||||
requireMessageCount(t, ctx, st, "message_events", 1)
|
||||
}
|
||||
|
||||
func TestImportCheckpointsUnresolvableRouteBearingCacheMisses(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
|
||||
channelID := "111111111111111121"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/api/v9/channels/%s/messages?limit=50
|
||||
{"id":"333333333333333346","channel_id":"%s","content":"permanent unresolved cache miss","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`, channelID, channelID), 0o600))
|
||||
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.SkippedMessages)
|
||||
require.Equal(t, 1, stats.Checkpoints)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "permanent unresolved", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, results)
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.FilesUnchanged)
|
||||
}
|
||||
|
||||
func TestImportDoesNotAppendEventsForSkippedMixedBatch(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
|
||||
guildID := "999999999999999996"
|
||||
resolvedChannelID := "111111111111111121"
|
||||
unresolvedChannelID := "111111111111111122"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/channels/%s/%s
|
||||
https://discord.com/api/v9/channels/%s/messages?limit=50
|
||||
{"id":"333333333333333346","channel_id":"%s","content":"mixed resolved message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
{"id":"333333333333333347","channel_id":"%s","content":"mixed unresolved message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`, guildID, resolvedChannelID, unresolvedChannelID, resolvedChannelID, unresolvedChannelID), 0o600))
|
||||
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.Checkpoints)
|
||||
requireMessageCount(t, ctx, st, "message_events", 0)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "mixed resolved", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
results, err = st.SearchMessages(ctx, store.SearchOptions{Query: "mixed unresolved", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, results)
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.FilesUnchanged)
|
||||
requireMessageCount(t, ctx, st, "message_events", 0)
|
||||
}
|
||||
|
||||
func TestImportDoesNotDuplicateEventsWhenSwitchingFullCacheModes(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
|
||||
channelID := "111111111111111121"
|
||||
guildID := "999999999999999996"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_000"), bytesf(`https://discord.com/channels/%s/%s
|
||||
{"id":"%s","guild_id":"%s","type":0,"name":"mode-switch"}
|
||||
{"id":"333333333333333346","channel_id":"%s","content":"mode switch event once","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`, guildID, channelID, channelID, guildID, channelID), 0o600))
|
||||
|
||||
t.Run("full then default", func(t *testing.T) {
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "full-first.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir, FullCache: true})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
requireMessageCount(t, ctx, st, "message_events", 1)
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.FilesUnchanged)
|
||||
requireMessageCount(t, ctx, st, "message_events", 1)
|
||||
})
|
||||
|
||||
t.Run("default then full", func(t *testing.T) {
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "default-first.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
requireMessageCount(t, ctx, st, "message_events", 1)
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir, FullCache: true})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.FilesUnchanged)
|
||||
requireMessageCount(t, ctx, st, "message_events", 1)
|
||||
})
|
||||
}
|
||||
|
||||
func TestImportFastCachePreservesKnownChannelMetadataAcrossBatches(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
leveldbPath := filepath.Join(dir, "Local Storage", "leveldb")
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(leveldbPath, 0o755))
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
|
||||
channelID := "111111111111111121"
|
||||
guildID := "999999999999999996"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(leveldbPath, "000001.log"), bytesf(
|
||||
`{"id":"%s","guild_id":"%s","type":11,"name":"known-thread","thread_metadata":{"archived":false}}`,
|
||||
channelID,
|
||||
guildID,
|
||||
), 0o600))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), bytesf(`https://discord.com/channels/%s/%s
|
||||
{"id":"333333333333333346","channel_id":"%s","content":"thread metadata cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`, guildID, channelID, channelID), 0o600))
|
||||
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
|
||||
channels, err := st.Channels(ctx, guildID)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, channels, 1)
|
||||
require.Equal(t, "known-thread", channels[0].Name)
|
||||
require.Equal(t, "thread_public", channels[0].Kind)
|
||||
|
||||
_, rows, err := st.ReadOnlyQuery(ctx, "select raw_json from channels where id = '111111111111111121'")
|
||||
require.NoError(t, err)
|
||||
require.Len(t, rows, 1)
|
||||
require.Contains(t, rows[0][0], `"type":11`)
|
||||
}
|
||||
|
||||
func TestImportFastCacheRouteFiltersServiceWorkerCacheStorage(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Service Worker", "CacheStorage", "cache-id")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "unrouted"), []byte(`
|
||||
{"id":"111111111111111121","guild_id":"999999999999999996","type":0,"name":"service-worker-cache"}
|
||||
{"id":"333333333333333346","channel_id":"111111111111111121","content":"service worker historical cache","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}
|
||||
`), 0o600))
|
||||
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.CacheFilesFastSkipped)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "service worker historical", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, results)
|
||||
}
|
||||
|
||||
func requireMessageCount(t *testing.T, ctx context.Context, st *store.Store, table string, expected int) {
|
||||
t.Helper()
|
||||
_, rows, err := st.ReadOnlyQuery(ctx, "select count(*) from "+table)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, rows, 1)
|
||||
require.Len(t, rows[0], 1)
|
||||
require.Equal(t, strconv.Itoa(expected), rows[0][0])
|
||||
}
|
||||
|
||||
func bytesf(format string, args ...any) []byte {
|
||||
return fmt.Appendf(nil, format, args...)
|
||||
}
|
||||
@ -1,110 +0,0 @@
|
||||
package discorddesktop
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
type importRun struct {
|
||||
ctx context.Context
|
||||
st *store.Store
|
||||
opts Options
|
||||
state scanState
|
||||
rootFS *os.Root
|
||||
channelLookup map[string]store.ChannelRecord
|
||||
totals scanTotals
|
||||
stats *Stats
|
||||
base snapshot
|
||||
pending []fileCandidate
|
||||
pendingUnresolved unresolvedMessages
|
||||
pendingLookupSize int
|
||||
pendingRouteSize int
|
||||
}
|
||||
|
||||
func newImportRun(ctx context.Context, st *store.Store, opts Options, state scanState, rootFS *os.Root, stats *Stats) *importRun {
|
||||
return &importRun{
|
||||
ctx: ctx,
|
||||
st: st,
|
||||
opts: opts,
|
||||
state: state,
|
||||
rootFS: rootFS,
|
||||
channelLookup: copyChannelLookup(state.channels),
|
||||
totals: newScanTotals(),
|
||||
stats: stats,
|
||||
base: newSnapshot(),
|
||||
pendingUnresolved: unresolvedMessages{},
|
||||
pendingLookupSize: -1,
|
||||
pendingRouteSize: -1,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *importRun) scanContext(candidates []fileCandidate) error {
|
||||
if err := scanCandidates(r.ctx, r.rootFS, r.opts, candidates, r.base, r.channelLookup, r.stats); err != nil {
|
||||
return err
|
||||
}
|
||||
return r.finalizeAndCommit(candidates, r.base, false)
|
||||
}
|
||||
|
||||
func (r *importRun) scanCacheBatches(candidates []fileCandidate) error {
|
||||
for start := 0; start < len(candidates); start += checkpointEveryFiles {
|
||||
end := min(start+checkpointEveryFiles, len(candidates))
|
||||
batchCandidates := candidates[start:end]
|
||||
batch := newSnapshotWithContext(r.base)
|
||||
if err := scanCandidates(r.ctx, r.rootFS, r.opts, batchCandidates, batch, r.channelLookup, r.stats); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.finalizeAndCommit(batchCandidates, batch, false); err != nil {
|
||||
return err
|
||||
}
|
||||
mergeSnapshotContext(r.base, batch)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *importRun) finalizeAndCommit(candidates []fileCandidate, snap snapshot, recordSkipped bool) error {
|
||||
unresolved := finalizeSnapshot(snap, r.channelLookup, r.totals, r.stats, recordSkipped)
|
||||
checkpoint := len(unresolved) == 0
|
||||
if !checkpoint {
|
||||
r.deferCandidates(candidates, unresolved)
|
||||
}
|
||||
if len(candidates) == 0 && !snapshotHasChanges(snap) {
|
||||
return nil
|
||||
}
|
||||
return commitSnapshot(r.ctx, r.st, r.opts, r.state, candidates, snap, checkpoint, r.stats)
|
||||
}
|
||||
|
||||
func (r *importRun) deferCandidates(candidates []fileCandidate, unresolved unresolvedMessages) {
|
||||
r.pending = append(r.pending, candidates...)
|
||||
mergeUnresolved(r.pendingUnresolved, unresolved)
|
||||
if r.pendingLookupSize >= 0 {
|
||||
return
|
||||
}
|
||||
r.pendingLookupSize = len(r.channelLookup)
|
||||
r.pendingRouteSize = len(r.base.routes)
|
||||
}
|
||||
|
||||
func (r *importRun) retryPending() error {
|
||||
if len(r.pending) == 0 {
|
||||
return nil
|
||||
}
|
||||
if !r.pendingCanResolve() {
|
||||
recordUnresolved(r.pendingUnresolved, r.totals, r.stats)
|
||||
return checkpointScannedCandidates(r.ctx, r.st, r.opts, r.state, r.pending, r.stats)
|
||||
}
|
||||
retry := newSnapshotWithContext(r.base)
|
||||
if err := scanCandidates(r.ctx, r.rootFS, r.opts, r.pending, retry, r.channelLookup, r.stats); err != nil {
|
||||
return err
|
||||
}
|
||||
finalizeSnapshot(retry, r.channelLookup, r.totals, r.stats, true)
|
||||
if err := commitSnapshot(r.ctx, r.st, r.opts, r.state, r.pending, retry, true, r.stats); err != nil {
|
||||
return err
|
||||
}
|
||||
mergeSnapshotContext(r.base, retry)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *importRun) pendingCanResolve() bool {
|
||||
return len(r.channelLookup) > r.pendingLookupSize || len(r.base.routes) > r.pendingRouteSize
|
||||
}
|
||||
@ -4,71 +4,16 @@ import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
"github.com/steipete/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestDesktopPathAndImportHelpers(t *testing.T) {
|
||||
home := t.TempDir()
|
||||
switch runtime.GOOS {
|
||||
case "windows":
|
||||
t.Setenv("USERPROFILE", home)
|
||||
require.Equal(t, filepath.Join(home, "AppData", "Roaming", "discord"), DefaultPath())
|
||||
case "darwin":
|
||||
t.Setenv("HOME", home)
|
||||
require.Equal(t, filepath.Join(home, "Library", "Application Support", "discord"), DefaultPath())
|
||||
default:
|
||||
xdg := filepath.Join(home, "xdg")
|
||||
t.Setenv("XDG_CONFIG_HOME", xdg)
|
||||
require.Equal(t, filepath.Join(xdg, "discord"), DefaultPath())
|
||||
}
|
||||
|
||||
require.Equal(t, "dm", kindForChannelType(1, true))
|
||||
require.Equal(t, "group_dm", kindForChannelType(3, true))
|
||||
require.Equal(t, "text", kindForChannelType(0, false))
|
||||
require.Equal(t, "announcement", kindForChannelType(5, false))
|
||||
require.Equal(t, "thread_announcement", kindForChannelType(10, false))
|
||||
require.Equal(t, "thread_public", kindForChannelType(11, false))
|
||||
require.Equal(t, "thread_private", kindForChannelType(12, false))
|
||||
require.Equal(t, "forum", kindForChannelType(15, false))
|
||||
require.Equal(t, "desktop", kindForChannelType(99, false))
|
||||
embedParts := embedText(map[string]any{"embeds": []any{
|
||||
map[string]any{"title": " title ", "description": "body"},
|
||||
}})
|
||||
require.Equal(t, []string{"title", "body"}, embedParts)
|
||||
require.Equal(t, time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC), snowflakeTime("0"))
|
||||
require.True(t, snowflakeTime("not-a-snowflake").IsZero())
|
||||
require.Equal(t, time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC), parseDiscordTime("2026-04-24T12:00:00Z"))
|
||||
require.True(t, parseDiscordTime("bad").IsZero())
|
||||
require.Empty(t, formatOptionalTime(time.Time{}))
|
||||
require.NotEmpty(t, formatOptionalTime(time.Date(2026, 4, 24, 12, 0, 0, 0, time.UTC)))
|
||||
|
||||
i, ok := intField(map[string]any{"value": float64(3)}, "value")
|
||||
require.True(t, ok)
|
||||
require.Equal(t, 3, i)
|
||||
i, ok = intField(map[string]any{"value": json.Number("4")}, "value")
|
||||
require.True(t, ok)
|
||||
require.Equal(t, 4, i)
|
||||
_, ok = intField(map[string]any{"value": json.Number("nope")}, "value")
|
||||
require.False(t, ok)
|
||||
_, ok = intField(map[string]any{}, "value")
|
||||
require.False(t, ok)
|
||||
|
||||
require.Equal(t, int64(3), int64Field(map[string]any{"value": float64(3)}, "value"))
|
||||
require.Equal(t, int64(4), int64Field(map[string]any{"value": int64(4)}, "value"))
|
||||
require.Equal(t, int64(5), int64Field(map[string]any{"value": 5}, "value"))
|
||||
require.Equal(t, int64(6), int64Field(map[string]any{"value": json.Number("6")}, "value"))
|
||||
require.Zero(t, int64Field(map[string]any{"value": "6"}, "value"))
|
||||
}
|
||||
|
||||
func TestImportExtractsDirectMessageFromDesktopCache(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
@ -115,48 +60,6 @@ binary-ish {"t":"MESSAGE_CREATE","token":"do-not-store","d":{"id":"3333333333333
|
||||
require.NotContains(t, rows[0][0], "do-not-store")
|
||||
}
|
||||
|
||||
func TestImportSkipsUnchangedDesktopCacheFiles(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Local Storage", "leveldb")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
channelPath := filepath.Join(cachePath, "000001.log")
|
||||
messagePath := filepath.Join(cachePath, "000002.log")
|
||||
require.NoError(t, os.WriteFile(channelPath, []byte(`{"id":"111111111111111121","guild_id":"999999999999999996","type":0,"name":"wiretap-fast"}`), 0o600))
|
||||
require.NoError(t, os.WriteFile(messagePath, []byte(`{"id":"333333333333333346","channel_id":"111111111111111121","content":"first incremental message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222232","username":"alice"}}`), 0o600))
|
||||
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
st, err := store.Open(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 2, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.FilesScanned)
|
||||
require.Equal(t, 2, stats.FilesUnchanged)
|
||||
require.Equal(t, 0, stats.Messages)
|
||||
|
||||
require.NoError(t, os.WriteFile(messagePath, []byte(`{"id":"333333333333333347","channel_id":"111111111111111121","content":"second incremental message","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222233","username":"bob"}}`), 0o600))
|
||||
require.NoError(t, os.Chtimes(messagePath, time.Date(2026, 4, 23, 18, 21, 0, 0, time.UTC), time.Date(2026, 4, 23, 18, 21, 0, 0, time.UTC)))
|
||||
|
||||
stats, err = Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, stats.FilesScanned)
|
||||
require.Equal(t, 1, stats.FilesUnchanged)
|
||||
require.Equal(t, 1, stats.Messages)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "second incremental", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
require.Equal(t, "999999999999999996", results[0].GuildID)
|
||||
require.Equal(t, "wiretap-fast", results[0].ChannelName)
|
||||
}
|
||||
|
||||
func TestImportDryRunDoesNotWrite(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
@ -178,22 +81,6 @@ func TestImportDryRunDoesNotWrite(t *testing.T) {
|
||||
require.Empty(t, results)
|
||||
}
|
||||
|
||||
func TestImportMissingDesktopPathIsEmpty(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "missing")
|
||||
st, err := store.Open(ctx, filepath.Join(dir, "discrawl.db"))
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: path})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, path, stats.Path)
|
||||
require.Zero(t, stats.FilesScanned)
|
||||
require.Zero(t, stats.Messages)
|
||||
require.False(t, stats.FinishedAt.IsZero())
|
||||
}
|
||||
|
||||
func TestImportExtractsCompressedUnknownMessageArrayFromChromiumCache(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
@ -228,54 +115,6 @@ func TestImportExtractsCompressedUnknownMessageArrayFromChromiumCache(t *testing
|
||||
require.Empty(t, results)
|
||||
}
|
||||
|
||||
func TestImportClassifiesCachedAPIMessageArrayFromSelectedDMRoute(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
storagePath := filepath.Join(dir, "Local Storage", "leveldb")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
require.NoError(t, os.MkdirAll(storagePath, 0o755))
|
||||
|
||||
require.NoError(t, os.WriteFile(filepath.Join(storagePath, "000001.log"), []byte(`noise
|
||||
{"_state":{"selectedGuildId":null,"selectedChannelId":"1459084628458471569","selectedChannelIds":{"null":"1459084628458471569"}}}
|
||||
`), 0o600))
|
||||
|
||||
messages := `[
|
||||
{"id":"1499513741308461240","channel_id":"1459084628458471569","content":"changed your mind later","timestamp":"2026-04-30T20:52:15.546Z","author":{"id":"1395396685148061737","username":"onur_tc","global_name":"onur"}},
|
||||
{"id":"1499513741308461241","channel_id":"1459084628458471569","content":"please correct me","timestamp":"2026-04-30T20:52:16.546Z","author":{"id":"1395396685148061737","username":"onur_tc","global_name":"onur"}},
|
||||
{"id":"1499562787343278080","channel_id":"1459084628458471569","content":"I know you are going through a rough time","timestamp":"2026-05-01T00:08:34.929Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}}
|
||||
]`
|
||||
var compressed bytes.Buffer
|
||||
zw := gzip.NewWriter(&compressed)
|
||||
_, err := zw.Write([]byte(messages))
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, zw.Close())
|
||||
|
||||
cacheBlob := append([]byte("https://discord.com/api/v9/channels/1459084628458471569/messages?limit=14\x00"), compressed.Bytes()...)
|
||||
cacheBlob = append(cacheBlob, []byte("chromium trailing metadata")...)
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), cacheBlob, 0o600))
|
||||
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
st, err := store.Open(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 2, stats.FilesScanned)
|
||||
require.Equal(t, 3, stats.Messages)
|
||||
require.Equal(t, 3, stats.DMMessages)
|
||||
require.Equal(t, 1, stats.DMChannels)
|
||||
require.Equal(t, 0, stats.SkippedMessages)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "changed your mind", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, results, 1)
|
||||
require.Equal(t, DirectMessageGuildID, results[0].GuildID)
|
||||
require.Equal(t, "onur", results[0].ChannelName)
|
||||
require.Equal(t, "onur", results[0].AuthorName)
|
||||
}
|
||||
|
||||
func TestImportReconcilesMessagesWithLaterGuildChannelMetadata(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
@ -344,113 +183,6 @@ func TestImportClassifiesMessagesFromCachedChannelRoutes(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Len(t, guildResults, 1)
|
||||
require.Equal(t, "999999999999999998", guildResults[0].GuildID)
|
||||
|
||||
guildChannels, err := st.Channels(ctx, "999999999999999998")
|
||||
require.NoError(t, err)
|
||||
require.Len(t, guildChannels, 1)
|
||||
require.Equal(t, "111111111111111115", guildChannels[0].ID)
|
||||
require.Equal(t, "channel-111115", guildChannels[0].Name)
|
||||
|
||||
_, guildRows, err := st.ReadOnlyQuery(ctx, "select name from guilds where id = '999999999999999998'")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, [][]string{{"Discord Desktop Guild 999999999999999998"}}, guildRows)
|
||||
}
|
||||
|
||||
func TestImportClassifiesGzipCacheMessagesFromRendererRoutes(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
logPath := filepath.Join(dir, "logs")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
require.NoError(t, os.MkdirAll(logPath, 0o755))
|
||||
|
||||
require.NoError(t, os.WriteFile(filepath.Join(logPath, "renderer_js.log"), []byte(`[Routing/Utils] transitionTo - Transitioning to /channels/@me/111111111111111122
|
||||
[Routing/Utils] transitionTo - Transitioning to /channels/999999999999999995/111111111111111123
|
||||
`), 0o600))
|
||||
|
||||
var compressed bytes.Buffer
|
||||
zw := gzip.NewWriter(&compressed)
|
||||
_, err := zw.Write([]byte(`[
|
||||
{"id":"333333333333333348","channel_id":"111111111111111122","content":"current cache dm","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222234","username":"alice","global_name":"Alice"}},
|
||||
{"id":"333333333333333349","channel_id":"111111111111111123","content":"current cache guild","timestamp":"2026-04-23T18:20:44Z","author":{"id":"222222222222222235","username":"bob","global_name":"Bob"}}
|
||||
]`))
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, zw.Close())
|
||||
|
||||
cacheBlob := append([]byte("1/0/https://discord.com/api/v9/channels/111111111111111122/messages?limit=13\x00content-encoding\x00gzip\x00"), compressed.Bytes()...)
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), cacheBlob, 0o600))
|
||||
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
st, err := store.Open(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 2, stats.FilesScanned)
|
||||
require.Equal(t, 2, stats.Messages)
|
||||
require.Equal(t, 1, stats.DMMessages)
|
||||
require.Equal(t, 1, stats.GuildMessages)
|
||||
require.Equal(t, 0, stats.SkippedMessages)
|
||||
|
||||
dmResults, err := st.SearchMessages(ctx, store.SearchOptions{Query: "current cache dm", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, dmResults, 1)
|
||||
require.Equal(t, DirectMessageGuildID, dmResults[0].GuildID)
|
||||
require.Equal(t, "Alice", dmResults[0].ChannelName)
|
||||
|
||||
guildResults, err := st.SearchMessages(ctx, store.SearchOptions{Query: "current cache guild", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, guildResults, 1)
|
||||
require.Equal(t, "999999999999999995", guildResults[0].GuildID)
|
||||
require.Equal(t, "channel-111123", guildResults[0].ChannelName)
|
||||
}
|
||||
|
||||
func TestImportInfersDirectMessageNamesFromCachedUsers(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(`https://discord.com/channels/@me/111111111111111119
|
||||
[
|
||||
{"id":"333333333333333341","channel_id":"111111111111111119","content":"self first","timestamp":"2026-04-23T18:20:43Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}},
|
||||
{"id":"333333333333333342","channel_id":"111111111111111119","content":"self second","timestamp":"2026-04-23T18:20:44Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}},
|
||||
{"id":"333333333333333343","channel_id":"111111111111111119","content":"counterparty","timestamp":"2026-04-23T18:20:45Z","author":{"id":"222222222222222230","username":"vincentkoc"}}
|
||||
]
|
||||
{"user":{"id":"222222222222222230","username":"vincentkoc","global_name":"Vincent K"}}
|
||||
https://discord.com/channels/@me/111111111111111120
|
||||
{"id":"333333333333333344","channel_id":"111111111111111120","content":"another dm","timestamp":"2026-04-23T18:20:46Z","author":{"id":"999999999999999991","username":"steipete","global_name":"Peter"}}
|
||||
{"id":"333333333333333345","channel_id":"111111111111111120","content":"alice reply","timestamp":"2026-04-23T18:20:47Z","author":{"id":"222222222222222231","username":"alice","global_name":"Alice"}}
|
||||
`), 0o600))
|
||||
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
st, err := store.Open(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 5, stats.Messages)
|
||||
require.Equal(t, 2, stats.DMChannels)
|
||||
|
||||
channels, err := st.Channels(ctx, DirectMessageGuildID)
|
||||
require.NoError(t, err)
|
||||
namesByID := map[string]string{}
|
||||
for _, channel := range channels {
|
||||
namesByID[channel.ID] = channel.Name
|
||||
}
|
||||
require.Equal(t, "Vincent K", namesByID["111111111111111119"])
|
||||
require.Equal(t, "Alice", namesByID["111111111111111120"])
|
||||
|
||||
rows, err := st.ListMessages(ctx, store.MessageListOptions{
|
||||
GuildIDs: []string{DirectMessageGuildID},
|
||||
Channel: "Vincent",
|
||||
Last: 1,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, rows, 1)
|
||||
require.Equal(t, "Vincent K", rows[0].ChannelName)
|
||||
require.Equal(t, "Vincent K", rows[0].AuthorName)
|
||||
}
|
||||
|
||||
func TestImportDropsPreviousUnknownWiretapRows(t *testing.T) {
|
||||
@ -484,28 +216,3 @@ func TestImportDropsPreviousUnknownWiretapRows(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, results)
|
||||
}
|
||||
|
||||
func TestImportSkipsAmbiguousCachedChannelRoutes(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
dir := t.TempDir()
|
||||
cachePath := filepath.Join(dir, "Cache", "Cache_Data")
|
||||
require.NoError(t, os.MkdirAll(cachePath, 0o755))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(cachePath, "entry_0"), []byte(`https://discord.com/channels/999999999999999998/111111111111111118
|
||||
https://discord.com/channels/999999999999999997/111111111111111118
|
||||
{"id":"333333333333333340","channel_id":"111111111111111118","content":"ambiguous route message","timestamp":"2026-04-23T18:20:43Z","author":{"id":"222222222222222229","username":"alice"}}`), 0o600))
|
||||
|
||||
dbPath := filepath.Join(dir, "discrawl.db")
|
||||
st, err := store.Open(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = st.Close() }()
|
||||
|
||||
stats, err := Import(ctx, st, Options{Path: dir})
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, stats.Messages)
|
||||
require.Equal(t, 1, stats.SkippedMessages)
|
||||
require.Equal(t, 1, stats.SkippedChannels)
|
||||
|
||||
results, err := st.SearchMessages(ctx, store.SearchOptions{Query: "ambiguous", Limit: 10})
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, results)
|
||||
}
|
||||
|
||||
@ -1,165 +0,0 @@
|
||||
package discorddesktop
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/openclaw/discrawl/internal/store"
|
||||
)
|
||||
|
||||
func TestPrimitiveValueHelpers(t *testing.T) {
|
||||
raw := map[string]any{
|
||||
"string": "value",
|
||||
"blank": " ",
|
||||
"int": 3,
|
||||
"int64": int64(4),
|
||||
"float": float64(5),
|
||||
"json_number": json.Number("6"),
|
||||
"numeric": "7",
|
||||
"bad_numeric": "nope",
|
||||
"truthy": true,
|
||||
"array": []any{"one", "two"},
|
||||
}
|
||||
|
||||
require.Equal(t, "value", stringField(raw, "string"))
|
||||
require.Empty(t, stringField(raw, "blank"))
|
||||
require.Equal(t, "6", stringField(raw, "json_number"))
|
||||
require.Empty(t, stringField(raw, "int"))
|
||||
require.Empty(t, stringField(raw, "missing"))
|
||||
|
||||
for key, want := range map[string]int{
|
||||
"int": 3,
|
||||
"float": 5,
|
||||
"json_number": 6,
|
||||
} {
|
||||
got, ok := intField(raw, key)
|
||||
require.True(t, ok, key)
|
||||
require.Equal(t, want, got, key)
|
||||
}
|
||||
_, ok := intField(raw, "bad_numeric")
|
||||
require.False(t, ok)
|
||||
_, ok = intField(raw, "int64")
|
||||
require.False(t, ok)
|
||||
_, ok = intField(raw, "numeric")
|
||||
require.False(t, ok)
|
||||
_, ok = intField(raw, "missing")
|
||||
require.False(t, ok)
|
||||
|
||||
require.Equal(t, int64(3), int64Field(raw, "int"))
|
||||
require.Equal(t, int64(4), int64Field(raw, "int64"))
|
||||
require.Equal(t, int64(5), int64Field(raw, "float"))
|
||||
require.Equal(t, int64(6), int64Field(raw, "json_number"))
|
||||
require.Zero(t, int64Field(raw, "numeric"))
|
||||
require.Zero(t, int64Field(raw, "bad_numeric"))
|
||||
|
||||
require.True(t, boolField(raw, "truthy"))
|
||||
require.False(t, boolField(raw, "missing"))
|
||||
require.Equal(t, 2, lenArray(raw["array"]))
|
||||
require.Zero(t, lenArray(raw["string"]))
|
||||
require.Equal(t, "fallback", firstNonEmpty("", " ", "fallback", "later"))
|
||||
require.Empty(t, firstNonEmpty("", " "))
|
||||
}
|
||||
|
||||
func TestDiscordValueFormatHelpers(t *testing.T) {
|
||||
require.Equal(t, "456789", shortID("123456789"))
|
||||
require.Equal(t, "short", shortID("short"))
|
||||
require.Equal(t, "Discord Direct Messages", guildName(DirectMessageGuildID))
|
||||
require.Equal(t, "Discord Desktop Guild 123456", guildName("123456"))
|
||||
|
||||
require.Equal(t, "dm", kindForChannelType(1, true))
|
||||
require.Equal(t, "group_dm", kindForChannelType(3, true))
|
||||
require.Equal(t, "thread_public", kindForChannelType(11, false))
|
||||
require.Equal(t, "thread_private", kindForChannelType(12, false))
|
||||
require.Equal(t, "thread_announcement", kindForChannelType(10, false))
|
||||
require.Equal(t, "desktop", kindForChannelType(2, false))
|
||||
require.Equal(t, "desktop", kindForChannelType(4, false))
|
||||
require.Equal(t, "announcement", kindForChannelType(5, false))
|
||||
require.Equal(t, "forum", kindForChannelType(15, false))
|
||||
require.Equal(t, "desktop", kindForChannelType(16, false))
|
||||
require.Equal(t, "text", kindForChannelType(0, false))
|
||||
}
|
||||
|
||||
func TestDiscordMessagePayloadHelpers(t *testing.T) {
|
||||
raw := map[string]any{
|
||||
"id": "333333333333333333",
|
||||
"channel_id": "111111111111111111",
|
||||
"guild_id": "999999999999999999",
|
||||
"type": float64(0),
|
||||
"timestamp": "2026-05-08T12:00:00Z",
|
||||
"edited_timestamp": "2026-05-08T12:05:00Z",
|
||||
"content": "hello\u200b\nworld",
|
||||
"message_reference": map[string]any{"message_id": "222222222222222222"},
|
||||
"author": map[string]any{
|
||||
"id": "444444444444444444",
|
||||
"username": "peter",
|
||||
"global_name": "Peter",
|
||||
"display_name": "Peter S",
|
||||
"discriminator": "0",
|
||||
"bot": true,
|
||||
},
|
||||
"attachments": []any{
|
||||
map[string]any{"filename": "trace.txt", "content_type": "text/plain", "size": float64(12), "url": "https://cdn.example/trace.txt"},
|
||||
map[string]any{"id": "att2"},
|
||||
"ignored",
|
||||
},
|
||||
"mentions": []any{
|
||||
map[string]any{"id": "555555555555555555", "username": "alice", "global_name": "Alice"},
|
||||
map[string]any{"username": "missing"},
|
||||
},
|
||||
"embeds": []any{
|
||||
map[string]any{"title": "Deploy", "description": "Ready"},
|
||||
map[string]any{"title": " "},
|
||||
},
|
||||
}
|
||||
at := parseDiscordTime("2026-05-08T12:00:00Z")
|
||||
attachments := parseAttachments(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444")
|
||||
require.Len(t, attachments, 2)
|
||||
require.Equal(t, "333333333333333333:0", attachments[0].AttachmentID)
|
||||
require.Equal(t, "trace.txt", attachments[0].Filename)
|
||||
require.Equal(t, "att2", attachments[1].Filename)
|
||||
require.Equal(t, []string{"trace.txt", "att2"}, attachmentText(attachments))
|
||||
|
||||
mentions := parseMentions(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444", at)
|
||||
require.Equal(t, []store.MentionEventRecord{{
|
||||
MessageID: "333333333333333333",
|
||||
GuildID: "999999999999999999",
|
||||
ChannelID: "111111111111111111",
|
||||
AuthorID: "444444444444444444",
|
||||
TargetType: "user",
|
||||
TargetID: "555555555555555555",
|
||||
TargetName: "Alice",
|
||||
EventAt: at.Format(time.RFC3339Nano),
|
||||
}}, mentions)
|
||||
|
||||
require.Equal(t, []string{"Deploy", "Ready"}, embedText(raw))
|
||||
require.Equal(t, "helloworld\ntrace.txt\natt2\nDeploy\nReady", normalizeText(raw["content"], attachmentText(attachments), embedText(raw)))
|
||||
require.Equal(t, "hidden text", cleanText("\u200bhidden\x00 text\n"))
|
||||
require.Equal(t, "222222222222222222", messageReferenceID(raw))
|
||||
require.Empty(t, messageReferenceID(map[string]any{}))
|
||||
|
||||
require.Contains(t, syntheticGuild("g1", "Guild").RawJSON, "discord_desktop")
|
||||
require.Equal(t, "dm", syntheticChannel("c1", DirectMessageGuildID, "Alice").Kind)
|
||||
require.Equal(t, "group_dm", syntheticChannel("c2", DirectMessageGuildID, "Alice, Bob").Kind)
|
||||
require.Equal(t, "channel-123456", syntheticChannel("123456123456", "g1", "").Name)
|
||||
require.Contains(t, channelRawJSON(raw, "c1", "g1", "general", "text"), `"kind":"text"`)
|
||||
require.Contains(t, messageRawJSON(raw, "333333333333333333", "999999999999999999", "111111111111111111", "444444444444444444"), "desktop_cache_note")
|
||||
require.Equal(t, "Alice, Bob", recipientLabel([]any{
|
||||
map[string]any{"username": "Bob"},
|
||||
map[string]any{"global_name": "Alice"},
|
||||
map[string]any{},
|
||||
}))
|
||||
|
||||
require.True(t, parseDiscordTime("2026-05-08T12:00:00.123Z").Equal(time.Date(2026, 5, 8, 12, 0, 0, 123000000, time.UTC)))
|
||||
require.True(t, parseDiscordTime("bad").IsZero())
|
||||
require.True(t, parseDiscordTime("").IsZero())
|
||||
require.False(t, snowflakeTime("175928847299117063").IsZero())
|
||||
require.True(t, snowflakeTime("bad").IsZero())
|
||||
require.Empty(t, formatOptionalTime(time.Time{}))
|
||||
require.Equal(t, "2026-05-08T12:00:00Z", formatOptionalTime(at))
|
||||
require.True(t, looksSnowflake("123456789012"))
|
||||
require.False(t, looksSnowflake("123"))
|
||||
require.False(t, looksSnowflake("12345678901x"))
|
||||
}
|
||||
91
internal/embed/ollama.go
Normal file
91
internal/embed/ollama.go
Normal file
@ -0,0 +1,91 @@
|
||||
package embed
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type ollamaProvider struct {
|
||||
client *http.Client
|
||||
baseURL string
|
||||
model string
|
||||
maxInputChars int
|
||||
}
|
||||
|
||||
type ollamaEmbedRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input []string `json:"input"`
|
||||
}
|
||||
|
||||
type ollamaEmbedResponse struct {
|
||||
Model string `json:"model"`
|
||||
Embeddings [][]float32 `json:"embeddings"`
|
||||
}
|
||||
|
||||
func newOllamaProvider(settings providerSettings) Provider {
|
||||
return &ollamaProvider{
|
||||
client: settings.HTTPClient,
|
||||
baseURL: settings.BaseURL,
|
||||
model: settings.Model,
|
||||
maxInputChars: settings.MaxInputChars,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *ollamaProvider) Embed(ctx context.Context, inputs []string) (EmbeddingBatch, error) {
|
||||
if len(inputs) == 0 {
|
||||
return EmbeddingBatch{Model: p.model}, nil
|
||||
}
|
||||
payload := ollamaEmbedRequest{
|
||||
Model: p.model,
|
||||
Input: trimInputs(inputs, p.maxInputChars),
|
||||
}
|
||||
var response ollamaEmbedResponse
|
||||
if err := postJSON(ctx, p.client, p.baseURL+"/api/embed", "", payload, &response); err != nil {
|
||||
return EmbeddingBatch{}, err
|
||||
}
|
||||
if len(response.Embeddings) != len(inputs) {
|
||||
return EmbeddingBatch{}, fmt.Errorf("ollama embedding response returned %d vectors for %d inputs", len(response.Embeddings), len(inputs))
|
||||
}
|
||||
dimensions, err := inferDimensions(response.Embeddings)
|
||||
if err != nil {
|
||||
return EmbeddingBatch{}, err
|
||||
}
|
||||
model := response.Model
|
||||
if model == "" {
|
||||
model = p.model
|
||||
}
|
||||
return EmbeddingBatch{Model: model, Dimensions: dimensions, Vectors: response.Embeddings}, nil
|
||||
}
|
||||
|
||||
func postJSON(ctx context.Context, client *http.Client, endpoint, apiKey string, payload any, target any) error {
|
||||
body, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal embedding request: %w", err)
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(body))
|
||||
if err != nil {
|
||||
return fmt.Errorf("build embedding request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
if apiKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+apiKey)
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("embedding request failed: %w", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
msg, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||
return &HTTPError{StatusCode: resp.StatusCode, Body: string(msg)}
|
||||
}
|
||||
if err := json.NewDecoder(resp.Body).Decode(target); err != nil {
|
||||
return fmt.Errorf("decode embedding response: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
82
internal/embed/openai_compatible.go
Normal file
82
internal/embed/openai_compatible.go
Normal file
@ -0,0 +1,82 @@
|
||||
package embed
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type openAICompatibleProvider struct {
|
||||
client *http.Client
|
||||
baseURL string
|
||||
apiKey string
|
||||
model string
|
||||
maxInputChars int
|
||||
}
|
||||
|
||||
type openAIEmbeddingRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input []string `json:"input"`
|
||||
}
|
||||
|
||||
type openAIEmbeddingResponse struct {
|
||||
Model string `json:"model"`
|
||||
Data []openAIEmbeddingItem `json:"data"`
|
||||
}
|
||||
|
||||
type openAIEmbeddingItem struct {
|
||||
Index *int `json:"index"`
|
||||
Embedding []float32 `json:"embedding"`
|
||||
}
|
||||
|
||||
func newOpenAICompatibleProvider(settings providerSettings) Provider {
|
||||
return &openAICompatibleProvider{
|
||||
client: settings.HTTPClient,
|
||||
baseURL: settings.BaseURL,
|
||||
apiKey: settings.APIKey,
|
||||
model: settings.Model,
|
||||
maxInputChars: settings.MaxInputChars,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *openAICompatibleProvider) Embed(ctx context.Context, inputs []string) (EmbeddingBatch, error) {
|
||||
if len(inputs) == 0 {
|
||||
return EmbeddingBatch{Model: p.model}, nil
|
||||
}
|
||||
payload := openAIEmbeddingRequest{
|
||||
Model: p.model,
|
||||
Input: trimInputs(inputs, p.maxInputChars),
|
||||
}
|
||||
var response openAIEmbeddingResponse
|
||||
if err := postJSON(ctx, p.client, p.baseURL+"/embeddings", p.apiKey, payload, &response); err != nil {
|
||||
return EmbeddingBatch{}, err
|
||||
}
|
||||
if len(response.Data) != len(inputs) {
|
||||
return EmbeddingBatch{}, fmt.Errorf("openai-compatible embedding response returned %d vectors for %d inputs", len(response.Data), len(inputs))
|
||||
}
|
||||
vectors := make([][]float32, len(inputs))
|
||||
seen := make([]bool, len(inputs))
|
||||
for position, item := range response.Data {
|
||||
index := position
|
||||
if item.Index != nil {
|
||||
index = *item.Index
|
||||
}
|
||||
if index < 0 || index >= len(inputs) {
|
||||
return EmbeddingBatch{}, fmt.Errorf("openai-compatible embedding response index %d out of range", index)
|
||||
}
|
||||
if seen[index] {
|
||||
return EmbeddingBatch{}, fmt.Errorf("openai-compatible embedding response duplicated index %d", index)
|
||||
}
|
||||
seen[index] = true
|
||||
vectors[index] = item.Embedding
|
||||
}
|
||||
dimensions, err := inferDimensions(vectors)
|
||||
if err != nil {
|
||||
return EmbeddingBatch{}, err
|
||||
}
|
||||
model := response.Model
|
||||
if model == "" {
|
||||
model = p.model
|
||||
}
|
||||
return EmbeddingBatch{Model: model, Dimensions: dimensions, Vectors: vectors}, nil
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user