fix(share): delta import git snapshots
This commit is contained in:
parent
335a95bd66
commit
67c6f4655b
53
CHANGELOG.md
53
CHANGELOG.md
@ -4,22 +4,35 @@
|
|||||||
|
|
||||||
### Changes
|
### Changes
|
||||||
|
|
||||||
- Document the crawlkit-backed config/status/control, snapshot, mirror,
|
- Added `discrawl tui`, a terminal archive browser for stored guild messages and local `@me` wiretap DMs using the shared crawlkit pane browser.
|
||||||
sync-state, output, and shared TUI surfaces now used on `main`.
|
- Added crawlkit-backed `metadata --json`, `status --json`, and `doctor --json` control surfaces for launchers, automation, and CI checks.
|
||||||
- Clarify that Discord bot sync, desktop wiretap parsing, DM privacy filters,
|
- Published the generated documentation site at `discrawl.sh`, including command pages, install/setup docs, configuration, security notes, guides, a contact page, and social cards.
|
||||||
schema ownership, FTS/ranking, embeddings, and analytics remain app-owned.
|
- Moved the Go module and release metadata to `github.com/openclaw/discrawl`.
|
||||||
- Align terminal browser docs with the gitcrawl-style shared TUI model:
|
|
||||||
channel/person/thread groups, message rows, detail/thread panes, sorting,
|
### Fixes
|
||||||
mouse selection, right-click actions, and local/remote status chrome.
|
|
||||||
|
- Made the terminal browser more useful and accurate: default guild scoping, newest-message startup, compact panes, selected-message detail panes, count-header sorting, local/remote status labels, right-click actions, Discord message URLs, row labels, direct-message pane labels, mention rendering, inline mention resolution, attachment details, and reply-context hydration without broad thread scans.
|
||||||
|
- Kept `tui --help`, status, and terminal-browser reads safe for fresh or missing local databases without triggering Git snapshot auto-update.
|
||||||
|
- Kept local-only snapshot rows filtered during shared archive imports and forwarded snapshot import progress through the crawlkit import path.
|
||||||
|
- Made stale Git snapshot imports plan shard deltas from crawlkit file fingerprints or Git object identity, so routine shared-archive refreshes import changed message tail shards instead of rebuilding every table and FTS index.
|
||||||
|
- Included progress percentages in message-sync logs.
|
||||||
|
- Fixed GoReleaser version stamping after the module path move.
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
|
||||||
|
- Documented the crawlkit-backed config/status/control, snapshot, mirror, sync-state, output, and shared TUI surfaces now used on `main`.
|
||||||
|
- Clarified that Discord bot sync, desktop wiretap parsing, DM privacy filters, schema ownership, FTS/ranking, embeddings, and analytics remain app-owned.
|
||||||
|
- Aligned terminal-browser docs with the gitcrawl-style shared TUI model: channel/person/thread groups, message rows, detail/thread panes, sorting, mouse selection, right-click actions, and local/remote status chrome.
|
||||||
|
- Refreshed the repo-local `discrawl` agent skill for local Discord archive, freshness, query, boundary, TUI, verification, and read-only SQL workflows.
|
||||||
|
|
||||||
### Maintenance
|
### Maintenance
|
||||||
|
|
||||||
- Document the read-only `metadata --json`, `status --json`, and
|
- Migrated runtime paths, SQLite opening, archive mirror/export/import helpers, output/status wiring, and TUI plumbing onto the shared `crawlkit` infrastructure.
|
||||||
`doctor --json` control surface for launchers, automation, and CI checks.
|
- Updated crawlkit through `v0.4.1`, switched imports to `github.com/openclaw/crawlkit`, and added CI smoke coverage for the crawlkit control surface and merge behavior.
|
||||||
- Refresh the repo-local `discrawl` agent skill for local Discord archive,
|
- Added CodeQL, verified secret scanning, protected automation owners, stale issue automation, `.editorconfig`, and `.gitattributes`.
|
||||||
freshness, query, boundary, TUI, and verification workflows.
|
- Added release workflow automation that dispatches the Homebrew tap formula update after GoReleaser publishes a tag.
|
||||||
- Document `discrawl sql` read-only query examples in the repo-local agent
|
|
||||||
skill so agents can do exact archive counts and rankings safely.
|
## 0.6.6 - 2026-05-05
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
@ -45,24 +58,10 @@
|
|||||||
- Refreshed dependency and CI tooling pins, including GoReleaser, `go-toml`, golangci-lint, and gosec.
|
- Refreshed dependency and CI tooling pins, including GoReleaser, `go-toml`, golangci-lint, and gosec.
|
||||||
- Tightened CI compatibility with the latest linters and made signal-cancellation and sync fixture tests deterministic under the race detector.
|
- Tightened CI compatibility with the latest linters and made signal-cancellation and sync fixture tests deterministic under the race detector.
|
||||||
|
|
||||||
### Fixes
|
|
||||||
|
|
||||||
- Label direct-message TUI panes as direct messages instead of raw `@me` guild rows, keeping DM channel/person context readable.
|
|
||||||
- Inherit shared crawlkit TUI improvements for newest-first startup, count-header sorting, selected-message-first chat detail panes, and gitcrawl-style metadata labels.
|
|
||||||
- Surface Discord attachment filenames and extracted text in TUI detail panes instead of only showing `attachments=true`.
|
|
||||||
|
|
||||||
## 0.6.3 - 2026-05-01
|
## 0.6.3 - 2026-05-01
|
||||||
|
|
||||||
### Changes
|
|
||||||
|
|
||||||
- Add crawlkit control metadata/status surfaces with `metadata --json`, `status --json`, and `doctor --json`.
|
|
||||||
- Add `tap` and `cache-import` as public desktop-cache import names while keeping `wiretap` as a documented legacy alias.
|
|
||||||
- Add `discrawl tui`, a terminal archive browser for stored guild messages and local `@me` wiretap DMs using the shared `crawlkit/tui` package.
|
|
||||||
- Render TUI rows with compact panes and expose pinned, attachment, reply, channel, and author metadata in the detail pane.
|
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
- Keep status and TUI reads safe for fresh or missing local databases without triggering git-share auto-update.
|
|
||||||
- Added OS keyring fallback for Discord bot-token resolution, keeping env as the first source and documenting the default keyring item. (#17)
|
- Added OS keyring fallback for Discord bot-token resolution, keeping env as the first source and documenting the default keyring item. (#17)
|
||||||
- Clarified and locked down FTS query normalization so operator-like search terms such as `AND`, `OR`, `NOT`, `NEAR`, and `*` stay parameterized and quoted before SQLite `MATCH`. Thanks @mvanhorn.
|
- Clarified and locked down FTS query normalization so operator-like search terms such as `AND`, `OR`, `NOT`, `NEAR`, and `*` stay parameterized and quoted before SQLite `MATCH`. Thanks @mvanhorn.
|
||||||
|
|
||||||
|
|||||||
@ -177,7 +177,9 @@ The terminal browser uses the shared crawlkit explorer. The left pane groups
|
|||||||
channels, people, or threads; the middle pane lists messages; the right pane
|
channels, people, or threads; the middle pane lists messages; the right pane
|
||||||
shows the selected message, surrounding conversation, and thread detail. Mouse
|
shows the selected message, surrounding conversation, and thread detail. Mouse
|
||||||
selection, right-click actions, sortable headers, and the local/remote footer
|
selection, right-click actions, sortable headers, and the local/remote footer
|
||||||
follow the same interaction model as `gitcrawl tui`.
|
follow the same interaction model as `gitcrawl tui`. See
|
||||||
|
[`docs/commands/tui.md`](docs/commands/tui.md) for flags and read-only/DM scope
|
||||||
|
notes.
|
||||||
|
|
||||||
### `init`
|
### `init`
|
||||||
|
|
||||||
@ -247,6 +249,7 @@ When `--channels` includes a forum channel id, `discrawl` expands that forum's t
|
|||||||
Long runs now emit periodic progress logs to stderr so large backfills and Git snapshot imports do not look hung.
|
Long runs now emit periodic progress logs to stderr so large backfills and Git snapshot imports do not look hung.
|
||||||
If in-flight channels stop completing for a while, `discrawl` now emits `message sync waiting` heartbeat logs with the oldest active channel, per-channel page activity, and skip/defer counters, and every run ends with a `message sync finished` summary.
|
If in-flight channels stop completing for a while, `discrawl` now emits `message sync waiting` heartbeat logs with the oldest active channel, per-channel page activity, and skip/defer counters, and every run ends with a `message sync finished` summary.
|
||||||
Each channel crawl also has a bounded runtime budget, so a pathological channel is deferred and retried on the next sync instead of pinning a worker forever.
|
Each channel crawl also has a bounded runtime budget, so a pathological channel is deferred and retried on the next sync instead of pinning a worker forever.
|
||||||
|
Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl, and marker cleanup is best-effort so one missing local sync-state row cannot crash the run.
|
||||||
Full sync member refresh is best-effort and currently gives up after five minutes without a caller-supplied deadline, so message sync completion is not held hostage by a slow guild member crawl.
|
Full sync member refresh is best-effort and currently gives up after five minutes without a caller-supplied deadline, so message sync completion is not held hostage by a slow guild member crawl.
|
||||||
When the archive is already complete, `sync --full` now reuses the stored backlog markers and limits steady-state refresh to live top-level channels plus active threads instead of revisiting every stored archived thread.
|
When the archive is already complete, `sync --full` now reuses the stored backlog markers and limits steady-state refresh to live top-level channels plus active threads instead of revisiting every stored archived thread.
|
||||||
If a guild already has a local member snapshot, routine syncs reuse it and skip another full member crawl until that snapshot ages out.
|
If a guild already has a local member snapshot, routine syncs reuse it and skip another full member crawl until that snapshot ages out.
|
||||||
@ -482,9 +485,9 @@ discrawl subscribe --stale-after 15m https://github.com/example/discord-archive.
|
|||||||
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
|
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
|
||||||
```
|
```
|
||||||
|
|
||||||
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`). `discrawl update` forces the same pull/import step manually. `discrawl sync` does not auto-import the share unless `--update=auto` or `--update=force` is provided, so routine live refreshes stay fast.
|
Once `share.remote` is configured, read commands auto-fetch and import when the local share import is older than `share.stale_after` (default `15m`). Imports are planned from crawlkit shard fingerprints, with a Git-object fallback for older manifests, so routine updates normally read only changed tail shards and preserve local FTS rows instead of rebuilding the whole archive. `discrawl update` forces the same pull/import step manually. `discrawl sync` does not auto-import the share unless `--update=auto` or `--update=force` is provided, so routine live refreshes stay fast.
|
||||||
|
|
||||||
Hybrid mode is supported too: keep normal Discord credentials configured and set `share.remote`. `discrawl sync --update=auto` and `discrawl messages --sync` import the Git snapshot first, then use live Discord for latest-message deltas. Use `sync --all-channels` or `sync --full` when you intentionally want a broader live repair/backfill pass.
|
Hybrid mode is supported too: keep normal Discord credentials configured and set `share.remote`. `discrawl sync --update=auto` and `discrawl messages --sync` import the Git snapshot first, usually as a changed-shard delta, then use live Discord for latest-message deltas. Use `sync --all-channels` or `sync --full` when you intentionally want a broader live repair/backfill pass.
|
||||||
|
|
||||||
Git snapshots publish non-DM archive tables by default. Embedding queue state stays local to each machine, and Git-only readers can use FTS immediately without an embedding provider.
|
Git snapshots publish non-DM archive tables by default. Embedding queue state stays local to each machine, and Git-only readers can use FTS immediately without an embedding provider.
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,7 @@ By default, `sync` runs both live/local sources and does **not** import the Git
|
|||||||
- Discord bot-token sync for bot-visible guild data
|
- Discord bot-token sync for bot-visible guild data
|
||||||
- local Discord Desktop cache import for classifiable cached messages and proven DMs
|
- local Discord Desktop cache import for classifiable cached messages and proven DMs
|
||||||
|
|
||||||
Use [`update`](update.html) when you want to pull/import the shared Git snapshot. If you intentionally want a sync run to import the snapshot before live deltas, pass `--update=auto` (only when stale) or `--update=force` (always). `--no-update` is accepted as an explicit no-op alias for the default.
|
Use [`update`](update.html) when you want to pull/import the shared Git snapshot. Snapshot imports normally use changed-shard deltas, but unsafe table changes fall back to a full import. If you intentionally want a sync run to import the snapshot before live deltas, pass `--update=auto` (only when stale) or `--update=force` (always). `--no-update` is accepted as an explicit no-op alias for the default.
|
||||||
|
|
||||||
Run one explicit `--full` pass when you want a complete historical guild archive. Use plain `sync` afterward for frequent latest-message and desktop-cache refreshes.
|
Run one explicit `--full` pass when you want a complete historical guild archive. Use plain `sync` afterward for frequent latest-message and desktop-cache refreshes.
|
||||||
|
|
||||||
@ -70,6 +70,8 @@ discrawl sync --with-embeddings
|
|||||||
- Heartbeat logs (`message sync waiting`) name the oldest active channel and per-channel page activity if in-flight channels stop completing for a while.
|
- Heartbeat logs (`message sync waiting`) name the oldest active channel and per-channel page activity if in-flight channels stop completing for a while.
|
||||||
- Every run ends with a `message sync finished` summary.
|
- Every run ends with a `message sync finished` summary.
|
||||||
- Each channel crawl has a bounded runtime budget; pathological channels are deferred and retried next sync.
|
- Each channel crawl has a bounded runtime budget; pathological channels are deferred and retried next sync.
|
||||||
|
- Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl.
|
||||||
|
- Marker cleanup is best-effort, so one missing local sync-state row cannot crash the run.
|
||||||
- Full sync member refresh is best-effort and gives up after five minutes without a caller-supplied deadline.
|
- Full sync member refresh is best-effort and gives up after five minutes without a caller-supplied deadline.
|
||||||
- When the archive is already complete, `sync --full` reuses backlog markers and limits steady-state refresh to live top-level channels plus active threads.
|
- When the archive is already complete, `sync --full` reuses backlog markers and limits steady-state refresh to live top-level channels plus active threads.
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
Forces a Git snapshot pull and import.
|
Forces a Git snapshot pull and import.
|
||||||
|
|
||||||
|
Routine imports are delta-planned from crawlkit shard fingerprints, with a Git-object fallback for older manifests. The usual publish only imports changed tail shards; unsafe table changes fall back to a full import.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@ -19,7 +21,7 @@ discrawl update --with-embeddings
|
|||||||
|
|
||||||
## When to use it
|
## When to use it
|
||||||
|
|
||||||
- you have `share.remote` configured and want a fresh import before running a command that does not auto-update (`sync` does not auto-import unless `--update=auto` is passed)
|
- you have `share.remote` configured and want a fresh shard-delta import before running a command that does not auto-update (`sync` does not auto-import unless `--update=auto` is passed)
|
||||||
- you set `--no-auto-update` when subscribing and want to refresh on demand
|
- you set `--no-auto-update` when subscribing and want to refresh on demand
|
||||||
- a CI job already imported the latest snapshot but read commands still consider it stale
|
- a CI job already imported the latest snapshot but read commands still consider it stale
|
||||||
|
|
||||||
|
|||||||
@ -35,7 +35,7 @@ discrawl subscribe --stale-after 15m https://github.com/example/discord-archive.
|
|||||||
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
|
discrawl subscribe --no-auto-update https://github.com/example/discord-archive.git
|
||||||
```
|
```
|
||||||
|
|
||||||
`discrawl update` forces the same pull/import step manually.
|
`discrawl update` forces the same pull/import step manually. Snapshot imports are delta-planned from crawlkit shard fingerprints. Older manifests without those fields fall back to Git blob identity, so the common publish shape only imports the changed message tail shard plus small cursor tables. Unsafe table-shape changes still fall back to a full import.
|
||||||
|
|
||||||
`discrawl sync` does **not** auto-import the share unless `--update=auto` or `--update=force` is provided, so routine live refreshes stay fast.
|
`discrawl sync` does **not** auto-import the share unless `--update=auto` or `--update=force` is provided, so routine live refreshes stay fast.
|
||||||
|
|
||||||
@ -44,7 +44,7 @@ discrawl subscribe --no-auto-update https://github.com/example/discord-archive.g
|
|||||||
Keep normal Discord credentials configured **and** set `share.remote`:
|
Keep normal Discord credentials configured **and** set `share.remote`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
discrawl sync --update=auto # import snapshot first, then live deltas
|
discrawl sync --update=auto # import snapshot delta first, then live deltas
|
||||||
discrawl messages --sync # blocking pre-query sync for matched scope
|
discrawl messages --sync # blocking pre-query sync for matched scope
|
||||||
discrawl sync --all-channels # broader live repair
|
discrawl sync --all-channels # broader live repair
|
||||||
discrawl sync --full # historical backfill
|
discrawl sync --full # historical backfill
|
||||||
|
|||||||
@ -19,7 +19,7 @@ Sync modes control the Discord bot API side of a run. When `wiretap` is selected
|
|||||||
| Command | Use when | Behavior |
|
| Command | Use when | Behavior |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| `discrawl sync` | routine refresh | skips member refreshes, checks live top-level channels plus active threads, only fetches new messages for channels with a stored latest cursor |
|
| `discrawl sync` | routine refresh | skips member refreshes, checks live top-level channels plus active threads, only fetches new messages for channels with a stored latest cursor |
|
||||||
| `discrawl sync --update=auto` | hybrid Git/live refresh | imports a stale Git snapshot first, then runs the routine live refresh |
|
| `discrawl sync --update=auto` | hybrid Git/live refresh | imports a stale Git snapshot first, usually as a changed-shard delta, then runs the routine live refresh |
|
||||||
| `discrawl sync --all-channels` | repair pass | broad incremental sweep across every stored channel/thread, including archived threads |
|
| `discrawl sync --all-channels` | repair pass | broad incremental sweep across every stored channel/thread, including archived threads |
|
||||||
| `discrawl sync --full` | historical backfill | crawls older history until channels are complete; can take a long time on large servers |
|
| `discrawl sync --full` | historical backfill | crawls older history until channels are complete; can take a long time on large servers |
|
||||||
|
|
||||||
@ -43,6 +43,8 @@ Run one explicit `--full` pass when you want a complete historical guild archive
|
|||||||
- If in-flight channels stop completing for a while, `discrawl` emits `message sync waiting` heartbeat logs with the oldest active channel, per-channel page activity, and skip/defer counters.
|
- If in-flight channels stop completing for a while, `discrawl` emits `message sync waiting` heartbeat logs with the oldest active channel, per-channel page activity, and skip/defer counters.
|
||||||
- Every run ends with a `message sync finished` summary.
|
- Every run ends with a `message sync finished` summary.
|
||||||
- Each channel crawl has a bounded runtime budget; pathological channels are deferred and retried on the next sync.
|
- Each channel crawl has a bounded runtime budget; pathological channels are deferred and retried on the next sync.
|
||||||
|
- Retryable failures and unavailable-channel markers are tracked per channel; stale unavailable markers are cleared after a later successful crawl.
|
||||||
|
- Marker cleanup is best-effort, so one missing local sync-state row cannot crash the run.
|
||||||
- Full sync member refresh is best-effort and gives up after five minutes without a caller-supplied deadline, so message sync completion is not held hostage by a slow guild member crawl.
|
- Full sync member refresh is best-effort and gives up after five minutes without a caller-supplied deadline, so message sync completion is not held hostage by a slow guild member crawl.
|
||||||
- When the archive is already complete, `sync --full` reuses backlog markers and limits steady-state refresh to live top-level channels plus active threads instead of revisiting every stored archived thread.
|
- When the archive is already complete, `sync --full` reuses backlog markers and limits steady-state refresh to live top-level channels plus active threads instead of revisiting every stored archived thread.
|
||||||
- If a guild already has a local member snapshot, routine syncs reuse it and skip another full member crawl until that snapshot ages out.
|
- If a guild already has a local member snapshot, routine syncs reuse it and skip another full member crawl until that snapshot ages out.
|
||||||
|
|||||||
13
go.mod
13
go.mod
@ -13,9 +13,8 @@ require (
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/charmbracelet/bubbles v1.0.0 // indirect
|
github.com/charmbracelet/bubbles v1.0.0 // indirect
|
||||||
github.com/clipperhouse/displaywidth v0.9.0 // indirect
|
github.com/clipperhouse/displaywidth v0.11.0 // indirect
|
||||||
github.com/clipperhouse/stringish v0.1.1 // indirect
|
github.com/clipperhouse/uax29/v2 v2.7.0 // indirect
|
||||||
github.com/clipperhouse/uax29/v2 v2.5.0 // indirect
|
|
||||||
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
|
github.com/pelletier/go-toml/v2 v2.3.1 // indirect
|
||||||
modernc.org/sqlite v1.50.0 // indirect
|
modernc.org/sqlite v1.50.0 // indirect
|
||||||
)
|
)
|
||||||
@ -25,7 +24,7 @@ require (
|
|||||||
github.com/charmbracelet/bubbletea v1.3.10 // indirect
|
github.com/charmbracelet/bubbletea v1.3.10 // indirect
|
||||||
github.com/charmbracelet/colorprofile v0.4.1 // indirect
|
github.com/charmbracelet/colorprofile v0.4.1 // indirect
|
||||||
github.com/charmbracelet/lipgloss v1.1.0 // indirect
|
github.com/charmbracelet/lipgloss v1.1.0 // indirect
|
||||||
github.com/charmbracelet/x/ansi v0.11.6 // indirect
|
github.com/charmbracelet/x/ansi v0.11.7 // indirect
|
||||||
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
|
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
|
||||||
github.com/charmbracelet/x/term v0.2.2 // indirect
|
github.com/charmbracelet/x/term v0.2.2 // indirect
|
||||||
github.com/danieljoos/wincred v1.2.3 // indirect
|
github.com/danieljoos/wincred v1.2.3 // indirect
|
||||||
@ -36,18 +35,18 @@ require (
|
|||||||
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect
|
github.com/google/pprof v0.0.0-20260402051712-545e8a4df936 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
github.com/kr/pretty v0.3.1 // indirect
|
github.com/kr/pretty v0.3.1 // indirect
|
||||||
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
|
github.com/lucasb-eyer/go-colorful v1.4.0 // indirect
|
||||||
github.com/mattn/go-isatty v0.0.22 // indirect
|
github.com/mattn/go-isatty v0.0.22 // indirect
|
||||||
github.com/mattn/go-localereader v0.0.1 // indirect
|
github.com/mattn/go-localereader v0.0.1 // indirect
|
||||||
github.com/mattn/go-runewidth v0.0.19 // indirect
|
github.com/mattn/go-runewidth v0.0.23 // indirect
|
||||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
|
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
|
||||||
github.com/muesli/cancelreader v0.2.2 // indirect
|
github.com/muesli/cancelreader v0.2.2 // indirect
|
||||||
github.com/muesli/termenv v0.16.0 // indirect
|
github.com/muesli/termenv v0.16.0 // indirect
|
||||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||||
|
github.com/openclaw/crawlkit v0.4.2
|
||||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||||
github.com/rivo/uniseg v0.4.7 // indirect
|
github.com/rivo/uniseg v0.4.7 // indirect
|
||||||
github.com/vincentkoc/crawlkit v0.4.1
|
|
||||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
|
||||||
golang.org/x/crypto v0.50.0 // indirect
|
golang.org/x/crypto v0.50.0 // indirect
|
||||||
golang.org/x/tools v0.44.0 // indirect
|
golang.org/x/tools v0.44.0 // indirect
|
||||||
|
|||||||
26
go.sum
26
go.sum
@ -10,18 +10,16 @@ github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco
|
|||||||
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
|
github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk=
|
||||||
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
|
github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY=
|
||||||
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
|
github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30=
|
||||||
github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
|
github.com/charmbracelet/x/ansi v0.11.7 h1:kzv1kJvjg2S3r9KHo8hDdHFQLEqn4RBCb39dAYC84jI=
|
||||||
github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
|
github.com/charmbracelet/x/ansi v0.11.7/go.mod h1:9qGpnAVYz+8ACONkZBUWPtL7lulP9No6p1epAihUZwQ=
|
||||||
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
|
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
|
||||||
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
|
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
|
||||||
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
|
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
|
||||||
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
|
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
|
||||||
github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA=
|
github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8=
|
||||||
github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA=
|
github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3JhNl3dSqnGhOoSD/o0=
|
||||||
github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
|
github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk=
|
||||||
github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
|
github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
|
||||||
github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U=
|
|
||||||
github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
|
|
||||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||||
github.com/danieljoos/wincred v1.2.3 h1:v7dZC2x32Ut3nEfRH+vhoZGvN72+dQ/snVXo/vMFLdQ=
|
github.com/danieljoos/wincred v1.2.3 h1:v7dZC2x32Ut3nEfRH+vhoZGvN72+dQ/snVXo/vMFLdQ=
|
||||||
github.com/danieljoos/wincred v1.2.3/go.mod h1:6qqX0WNrS4RzPZ1tnroDzq9kY3fu1KwE7MRLQK4X0bs=
|
github.com/danieljoos/wincred v1.2.3/go.mod h1:6qqX0WNrS4RzPZ1tnroDzq9kY3fu1KwE7MRLQK4X0bs=
|
||||||
@ -49,14 +47,14 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
|||||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||||
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
|
github.com/lucasb-eyer/go-colorful v1.4.0 h1:UtrWVfLdarDgc44HcS7pYloGHJUjHV/4FwW4TvVgFr4=
|
||||||
github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
|
github.com/lucasb-eyer/go-colorful v1.4.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
|
||||||
github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
|
github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4=
|
||||||
github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
|
github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4=
|
||||||
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
|
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
|
||||||
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
|
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
|
||||||
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
|
github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw=
|
||||||
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
|
github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
|
||||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
|
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
|
||||||
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
|
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo=
|
||||||
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
|
github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
|
||||||
@ -65,6 +63,8 @@ github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc
|
|||||||
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
|
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
|
||||||
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
|
||||||
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||||
|
github.com/openclaw/crawlkit v0.4.2 h1:Lzzkd2/xSkQk+7KyboMEw+ZS2wmlYvDFLwAB2Z/FwBs=
|
||||||
|
github.com/openclaw/crawlkit v0.4.2/go.mod h1:/AI8o/DeRqXPZJPHq/9mGUjNzLPskm/wTjikRPxEdHY=
|
||||||
github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc=
|
github.com/pelletier/go-toml/v2 v2.3.1 h1:MYEvvGnQjeNkRF1qUuGolNtNExTDwct51yp7olPtrEc=
|
||||||
github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
|
github.com/pelletier/go-toml/v2 v2.3.1/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
|
||||||
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
|
||||||
@ -80,8 +80,6 @@ github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
|||||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
github.com/vincentkoc/crawlkit v0.4.1 h1:qDUF+Kk7nqADmpGMcnWTHEQMiX3bSD2DdFywKyT3kWs=
|
|
||||||
github.com/vincentkoc/crawlkit v0.4.1/go.mod h1:/ioLA/tyZ/927kAOGg0M8Mrqk7pnTZLpCKWfpul9zoE=
|
|
||||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
|
||||||
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
|
||||||
github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs=
|
github.com/zalando/go-keyring v0.2.8 h1:6sD/Ucpl7jNq10rM2pgqTs0sZ9V3qMrqfIIy5YPccHs=
|
||||||
|
|||||||
@ -8,9 +8,9 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/openclaw/crawlkit/control"
|
||||||
"github.com/openclaw/discrawl/internal/config"
|
"github.com/openclaw/discrawl/internal/config"
|
||||||
"github.com/openclaw/discrawl/internal/store"
|
"github.com/openclaw/discrawl/internal/store"
|
||||||
"github.com/vincentkoc/crawlkit/control"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func (r *runtime) runMetadata(args []string) error {
|
func (r *runtime) runMetadata(args []string) error {
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/vincentkoc/crawlkit/tui"
|
"github.com/openclaw/crawlkit/tui"
|
||||||
|
|
||||||
"github.com/openclaw/discrawl/internal/store"
|
"github.com/openclaw/discrawl/internal/store"
|
||||||
)
|
)
|
||||||
|
|||||||
@ -9,7 +9,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
crawlconfig "github.com/vincentkoc/crawlkit/config"
|
crawlconfig "github.com/openclaw/crawlkit/config"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|||||||
@ -8,23 +8,26 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"hash/fnv"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/openclaw/crawlkit/mirror"
|
||||||
|
"github.com/openclaw/crawlkit/snapshot"
|
||||||
"github.com/openclaw/discrawl/internal/store"
|
"github.com/openclaw/discrawl/internal/store"
|
||||||
"github.com/vincentkoc/crawlkit/mirror"
|
|
||||||
"github.com/vincentkoc/crawlkit/snapshot"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
ManifestName = "manifest.json"
|
ManifestName = "manifest.json"
|
||||||
LastImportSyncScope = "share:last_import_at"
|
LastImportSyncScope = "share:last_import_at"
|
||||||
LastImportManifestSyncScope = "share:last_import_manifest_generated_at"
|
LastImportManifestSyncScope = "share:last_import_manifest_generated_at"
|
||||||
|
LastImportManifestJSONScope = "share:last_import_manifest_json"
|
||||||
directMessageGuildID = "@me"
|
directMessageGuildID = "@me"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -159,6 +162,7 @@ func Import(ctx context.Context, s *store.Store, opts Options) (Manifest, error)
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return Manifest{}, err
|
return Manifest{}, err
|
||||||
}
|
}
|
||||||
|
manifest = enrichManifestFromGit(ctx, opts.RepoPath, "HEAD", manifest)
|
||||||
opts.reportProgress(ImportProgress{Phase: "start", TotalRows: manifestRowCount(manifest)})
|
opts.reportProgress(ImportProgress{Phase: "start", TotalRows: manifestRowCount(manifest)})
|
||||||
restorePragmas, err := applyImportPragmas(ctx, s.DB())
|
restorePragmas, err := applyImportPragmas(ctx, s.DB())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -262,6 +266,7 @@ func ImportIfChanged(ctx context.Context, s *store.Store, opts Options) (Manifes
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return Manifest{}, false, err
|
return Manifest{}, false, err
|
||||||
}
|
}
|
||||||
|
manifest = enrichManifestFromGit(ctx, opts.RepoPath, "HEAD", manifest)
|
||||||
if ManifestAlreadyImported(ctx, s, manifest) {
|
if ManifestAlreadyImported(ctx, s, manifest) {
|
||||||
if opts.IncludeEmbeddings {
|
if opts.IncludeEmbeddings {
|
||||||
if err := ImportEmbeddings(ctx, s, opts, manifest); err != nil {
|
if err := ImportEmbeddings(ctx, s, opts, manifest); err != nil {
|
||||||
@ -273,6 +278,12 @@ func ImportIfChanged(ctx context.Context, s *store.Store, opts Options) (Manifes
|
|||||||
}
|
}
|
||||||
return manifest, false, nil
|
return manifest, false, nil
|
||||||
}
|
}
|
||||||
|
if previous, ok := PreviousImportedManifest(ctx, s, opts); ok {
|
||||||
|
imported, changed, err := ImportIncremental(ctx, s, opts, previous, manifest)
|
||||||
|
if err == nil || !errors.Is(err, errIncrementalUnsupported) {
|
||||||
|
return imported, changed, err
|
||||||
|
}
|
||||||
|
}
|
||||||
imported, err := Import(ctx, s, opts)
|
imported, err := Import(ctx, s, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Manifest{}, false, err
|
return Manifest{}, false, err
|
||||||
@ -280,6 +291,81 @@ func ImportIfChanged(ctx context.Context, s *store.Store, opts Options) (Manifes
|
|||||||
return imported, true, nil
|
return imported, true, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var errIncrementalUnsupported = errors.New("incremental share import unsupported")
|
||||||
|
|
||||||
|
func ImportIncremental(ctx context.Context, s *store.Store, opts Options, previous, manifest Manifest) (Manifest, bool, error) {
|
||||||
|
plan := snapshot.PlanIncrementalImport(snapshotManifest(previous), snapshotManifest(manifest))
|
||||||
|
plan, supported := shareIncrementalPlan(plan)
|
||||||
|
if !supported {
|
||||||
|
return Manifest{}, false, errIncrementalUnsupported
|
||||||
|
}
|
||||||
|
if !plan.Changed() {
|
||||||
|
if err := MarkImported(ctx, s, manifest); err != nil {
|
||||||
|
return Manifest{}, false, err
|
||||||
|
}
|
||||||
|
return manifest, false, nil
|
||||||
|
}
|
||||||
|
opts.reportProgress(ImportProgress{Phase: "start", TotalRows: manifestRowCount(manifest)})
|
||||||
|
restorePragmas, err := applyImportPragmas(ctx, s.DB())
|
||||||
|
if err != nil {
|
||||||
|
return Manifest{}, false, err
|
||||||
|
}
|
||||||
|
pragmasRestored := false
|
||||||
|
defer func() {
|
||||||
|
if !pragmasRestored {
|
||||||
|
_ = restorePragmas(ctx)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
if _, _, err := snapshot.ImportIncremental(ctx, snapshot.IncrementalImportOptions{
|
||||||
|
DB: s.DB(),
|
||||||
|
RootDir: opts.RepoPath,
|
||||||
|
Current: snapshotManifest(manifest),
|
||||||
|
Plan: plan,
|
||||||
|
Progress: func(progress snapshot.ImportProgress) {
|
||||||
|
opts.reportProgress(ImportProgress{
|
||||||
|
Phase: progress.Phase,
|
||||||
|
Table: progress.Table,
|
||||||
|
File: progress.File,
|
||||||
|
FileIndex: progress.FileIndex,
|
||||||
|
FileCount: progress.FileCount,
|
||||||
|
Rows: progress.Rows,
|
||||||
|
TotalRows: progress.TotalRows,
|
||||||
|
})
|
||||||
|
},
|
||||||
|
Filter: func(table string, row map[string]any) (bool, error) {
|
||||||
|
return !isDirectMessageSnapshotRow(table, row), nil
|
||||||
|
},
|
||||||
|
DeleteTable: func(ctx context.Context, tx *sql.Tx, table string) error {
|
||||||
|
query, args := snapshotDeleteQuery(table)
|
||||||
|
if _, err := tx.ExecContext(ctx, query, args...); err != nil {
|
||||||
|
return fmt.Errorf("clear %s: %w", table, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
ImportRow: importIncrementalSnapshotRow,
|
||||||
|
AfterImport: func(ctx context.Context, tx *sql.Tx) error {
|
||||||
|
if err := repairImportedGuildIDs(ctx, tx); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if opts.IncludeEmbeddings {
|
||||||
|
return importEmbeddings(ctx, tx, opts, manifest.Embeddings)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}); err != nil {
|
||||||
|
return Manifest{}, false, err
|
||||||
|
}
|
||||||
|
if err := MarkImported(ctx, s, manifest); err != nil {
|
||||||
|
return Manifest{}, false, err
|
||||||
|
}
|
||||||
|
if err := restorePragmas(ctx); err != nil {
|
||||||
|
return Manifest{}, false, err
|
||||||
|
}
|
||||||
|
pragmasRestored = true
|
||||||
|
opts.reportProgress(ImportProgress{Phase: "done", TotalRows: manifestRowCount(manifest)})
|
||||||
|
return manifest, true, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (opts Options) reportProgress(progress ImportProgress) {
|
func (opts Options) reportProgress(progress ImportProgress) {
|
||||||
if opts.Progress != nil {
|
if opts.Progress != nil {
|
||||||
opts.Progress(progress)
|
opts.Progress(progress)
|
||||||
@ -340,7 +426,173 @@ func MarkImported(ctx context.Context, s *store.Store, manifest Manifest) error
|
|||||||
if manifest.GeneratedAt.IsZero() {
|
if manifest.GeneratedAt.IsZero() {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return s.SetSyncState(ctx, LastImportManifestSyncScope, manifest.GeneratedAt.Format(time.RFC3339Nano))
|
if err := s.SetSyncState(ctx, LastImportManifestSyncScope, manifest.GeneratedAt.Format(time.RFC3339Nano)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
body, err := json.Marshal(manifest)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("marshal imported manifest state: %w", err)
|
||||||
|
}
|
||||||
|
return s.SetSyncState(ctx, LastImportManifestJSONScope, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
func PreviousImportedManifest(ctx context.Context, s *store.Store, opts Options) (Manifest, bool) {
|
||||||
|
body, err := s.GetSyncState(ctx, LastImportManifestJSONScope)
|
||||||
|
if err == nil && strings.TrimSpace(body) != "" {
|
||||||
|
var manifest Manifest
|
||||||
|
if json.Unmarshal([]byte(body), &manifest) == nil && !manifest.GeneratedAt.IsZero() {
|
||||||
|
return manifest, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
last, err := s.GetSyncState(ctx, LastImportManifestSyncScope)
|
||||||
|
if err != nil || strings.TrimSpace(last) == "" {
|
||||||
|
return Manifest{}, false
|
||||||
|
}
|
||||||
|
generatedAt, err := time.Parse(time.RFC3339Nano, last)
|
||||||
|
if err != nil {
|
||||||
|
return Manifest{}, false
|
||||||
|
}
|
||||||
|
manifest, err := manifestFromGitHistory(ctx, opts.RepoPath, generatedAt)
|
||||||
|
if err != nil {
|
||||||
|
return Manifest{}, false
|
||||||
|
}
|
||||||
|
return manifest, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func manifestFromGitHistory(ctx context.Context, repoPath string, generatedAt time.Time) (Manifest, error) {
|
||||||
|
out, err := output(ctx, repoPath, "git", "log", "--format=%H", "--max-count=500", "--", ManifestName)
|
||||||
|
if err != nil {
|
||||||
|
return Manifest{}, err
|
||||||
|
}
|
||||||
|
for _, hash := range strings.Fields(out) {
|
||||||
|
body, err := output(ctx, repoPath, "git", "show", hash+":"+ManifestName)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
var manifest Manifest
|
||||||
|
if err := json.Unmarshal([]byte(body), &manifest); err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if manifest.GeneratedAt.Equal(generatedAt) {
|
||||||
|
return enrichManifestFromGit(ctx, repoPath, hash, manifest), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Manifest{}, fmt.Errorf("imported manifest %s not found in git history", generatedAt.Format(time.RFC3339Nano))
|
||||||
|
}
|
||||||
|
|
||||||
|
func enrichManifestFromGit(ctx context.Context, repoPath, rev string, manifest Manifest) Manifest {
|
||||||
|
if strings.TrimSpace(repoPath) == "" || manifestHasFileManifests(manifest) {
|
||||||
|
return manifest
|
||||||
|
}
|
||||||
|
files, err := gitTreeFiles(ctx, repoPath, rev)
|
||||||
|
if err != nil {
|
||||||
|
return manifest
|
||||||
|
}
|
||||||
|
for i := range manifest.Tables {
|
||||||
|
table := &manifest.Tables[i]
|
||||||
|
if len(table.FileManifests) > 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
paths := table.Files
|
||||||
|
if len(paths) == 0 && strings.TrimSpace(table.File) != "" {
|
||||||
|
paths = []string{table.File}
|
||||||
|
}
|
||||||
|
table.FileManifests = make([]snapshot.FileManifest, 0, len(paths))
|
||||||
|
for _, path := range paths {
|
||||||
|
info, ok := files[path]
|
||||||
|
if !ok {
|
||||||
|
table.FileManifests = nil
|
||||||
|
break
|
||||||
|
}
|
||||||
|
rows := 0
|
||||||
|
if len(paths) == 1 {
|
||||||
|
rows = table.Rows
|
||||||
|
}
|
||||||
|
table.FileManifests = append(table.FileManifests, snapshot.FileManifest{
|
||||||
|
Path: path,
|
||||||
|
Rows: rows,
|
||||||
|
Size: info.size,
|
||||||
|
SHA256: "git:" + info.object,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return manifest
|
||||||
|
}
|
||||||
|
|
||||||
|
func manifestHasFileManifests(manifest Manifest) bool {
|
||||||
|
for _, table := range manifest.Tables {
|
||||||
|
if (len(table.Files) > 0 || strings.TrimSpace(table.File) != "") && len(table.FileManifests) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
type gitTreeFile struct {
|
||||||
|
object string
|
||||||
|
size int64
|
||||||
|
}
|
||||||
|
|
||||||
|
func gitTreeFiles(ctx context.Context, repoPath, rev string) (map[string]gitTreeFile, error) {
|
||||||
|
if strings.TrimSpace(rev) == "" {
|
||||||
|
rev = "HEAD"
|
||||||
|
}
|
||||||
|
out, err := output(ctx, repoPath, "git", "ls-tree", "-r", "-l", rev, "--", "tables")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
files := map[string]gitTreeFile{}
|
||||||
|
for _, line := range strings.Split(out, "\n") {
|
||||||
|
fields := strings.Fields(line)
|
||||||
|
if len(fields) < 5 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
size, _ := strconv.ParseInt(fields[3], 10, 64)
|
||||||
|
files[fields[4]] = gitTreeFile{object: fields[2], size: size}
|
||||||
|
}
|
||||||
|
return files, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func snapshotManifest(manifest Manifest) snapshot.Manifest {
|
||||||
|
return snapshot.Manifest{
|
||||||
|
Version: manifest.Version,
|
||||||
|
GeneratedAt: manifest.GeneratedAt,
|
||||||
|
Tables: manifest.Tables,
|
||||||
|
Files: manifest.Files,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func shareIncrementalPlan(plan snapshot.ImportPlan) (snapshot.ImportPlan, bool) {
|
||||||
|
if plan.Full {
|
||||||
|
return plan, false
|
||||||
|
}
|
||||||
|
out := snapshot.ImportPlan{Tables: make([]snapshot.TableImportPlan, 0, len(plan.Tables))}
|
||||||
|
for _, tablePlan := range plan.Tables {
|
||||||
|
switch tablePlan.Mode {
|
||||||
|
case snapshot.TableImportSkip:
|
||||||
|
out.Tables = append(out.Tables, tablePlan)
|
||||||
|
case snapshot.TableImportFiles:
|
||||||
|
switch tablePlan.Table.Name {
|
||||||
|
case "messages":
|
||||||
|
out.Tables = append(out.Tables, tablePlan)
|
||||||
|
case "sync_state":
|
||||||
|
tablePlan.Mode = snapshot.TableImportReplace
|
||||||
|
tablePlan.Files = nil
|
||||||
|
tablePlan.Reason = "replace sync_state to avoid stale cursors"
|
||||||
|
out.Tables = append(out.Tables, tablePlan)
|
||||||
|
default:
|
||||||
|
return plan, false
|
||||||
|
}
|
||||||
|
case snapshot.TableImportReplace:
|
||||||
|
if tablePlan.Table.Name != "sync_state" {
|
||||||
|
return plan, false
|
||||||
|
}
|
||||||
|
out.Tables = append(out.Tables, tablePlan)
|
||||||
|
default:
|
||||||
|
return plan, false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out, true
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadManifest(repoPath string) (Manifest, error) {
|
func ReadManifest(repoPath string) (Manifest, error) {
|
||||||
@ -874,6 +1126,112 @@ func importValue(value any) any {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func importIncrementalSnapshotRow(ctx context.Context, tx *sql.Tx, table string, row map[string]any) error {
|
||||||
|
if table == "message_events" || table == "mention_events" {
|
||||||
|
delete(row, "event_id")
|
||||||
|
}
|
||||||
|
if err := insertOrReplaceSnapshotRow(ctx, tx, table, row); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if table != "messages" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
messageID := stringValue(row["id"])
|
||||||
|
if messageID == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return upsertMessageFTSRow(ctx, tx, messageID)
|
||||||
|
}
|
||||||
|
|
||||||
|
func insertOrReplaceSnapshotRow(ctx context.Context, tx *sql.Tx, table string, row map[string]any) error {
|
||||||
|
cols := make([]string, 0, len(row))
|
||||||
|
for col := range row {
|
||||||
|
cols = append(cols, col)
|
||||||
|
}
|
||||||
|
sort.Strings(cols)
|
||||||
|
quoted := make([]string, 0, len(cols))
|
||||||
|
placeholders := make([]string, 0, len(cols))
|
||||||
|
args := make([]any, 0, len(cols))
|
||||||
|
for _, col := range cols {
|
||||||
|
quoted = append(quoted, quoteIdent(col))
|
||||||
|
placeholders = append(placeholders, "?")
|
||||||
|
args = append(args, importValue(row[col]))
|
||||||
|
}
|
||||||
|
stmt := "insert or replace into " + quoteIdent(table) + "(" + strings.Join(quoted, ",") + ") values(" + strings.Join(placeholders, ",") + ")"
|
||||||
|
if _, err := tx.ExecContext(ctx, stmt, args...); err != nil {
|
||||||
|
return fmt.Errorf("insert %s: %w", table, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func upsertMessageFTSRow(ctx context.Context, tx *sql.Tx, messageID string) error {
|
||||||
|
rowID, ok := messageFTSRowID(messageID)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if _, err := tx.ExecContext(ctx, `delete from message_fts where rowid = ?`, rowID); err != nil {
|
||||||
|
return fmt.Errorf("delete message_fts %s: %w", messageID, err)
|
||||||
|
}
|
||||||
|
var (
|
||||||
|
guildID string
|
||||||
|
channelID string
|
||||||
|
authorID string
|
||||||
|
authorName string
|
||||||
|
channelName string
|
||||||
|
content string
|
||||||
|
)
|
||||||
|
if err := tx.QueryRowContext(ctx, `
|
||||||
|
select
|
||||||
|
m.guild_id,
|
||||||
|
m.channel_id,
|
||||||
|
coalesce(m.author_id, ''),
|
||||||
|
coalesce(
|
||||||
|
json_extract(m.raw_json, '$.member.nick'),
|
||||||
|
json_extract(m.raw_json, '$.author.global_name'),
|
||||||
|
json_extract(m.raw_json, '$.author.username'),
|
||||||
|
''
|
||||||
|
),
|
||||||
|
coalesce(c.name, ''),
|
||||||
|
m.normalized_content
|
||||||
|
from messages m
|
||||||
|
left join channels c on c.id = m.channel_id
|
||||||
|
where m.id = ?
|
||||||
|
`, messageID).Scan(&guildID, &channelID, &authorID, &authorName, &channelName, &content); err != nil {
|
||||||
|
return fmt.Errorf("query message_fts %s: %w", messageID, err)
|
||||||
|
}
|
||||||
|
if _, err := tx.ExecContext(ctx, `
|
||||||
|
insert into message_fts(rowid, message_id, guild_id, channel_id, author_id, author_name, channel_name, content)
|
||||||
|
values(?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
`, rowID, messageID, guildID, channelID, nullIfEmpty(authorID), authorName, channelName, content); err != nil {
|
||||||
|
return fmt.Errorf("insert message_fts %s: %w", messageID, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func messageFTSRowID(messageID string) (int64, bool) {
|
||||||
|
if messageID == "" {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
rowID, err := strconv.ParseInt(messageID, 10, 64)
|
||||||
|
if err == nil && rowID > 0 {
|
||||||
|
return rowID, true
|
||||||
|
}
|
||||||
|
hash := fnv.New64a()
|
||||||
|
_, _ = hash.Write([]byte(messageID))
|
||||||
|
rowID = int64(hash.Sum64() & ((uint64(1) << 63) - 1))
|
||||||
|
if rowID == 0 {
|
||||||
|
rowID = 1
|
||||||
|
}
|
||||||
|
return rowID, true
|
||||||
|
}
|
||||||
|
|
||||||
|
func nullIfEmpty(value string) any {
|
||||||
|
if value == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
func stringValue(value any) string {
|
func stringValue(value any) string {
|
||||||
switch v := value.(type) {
|
switch v := value.(type) {
|
||||||
case string:
|
case string:
|
||||||
|
|||||||
@ -14,6 +14,8 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/openclaw/crawlkit/mirror"
|
||||||
|
"github.com/openclaw/crawlkit/snapshot"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/openclaw/discrawl/internal/store"
|
"github.com/openclaw/discrawl/internal/store"
|
||||||
@ -73,6 +75,127 @@ func TestExportImportRoundTrip(t *testing.T) {
|
|||||||
require.Equal(t, manifest.GeneratedAt, imported.GeneratedAt)
|
require.Equal(t, manifest.GeneratedAt, imported.GeneratedAt)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestImportIfChangedUsesIncrementalTailImport(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
src := seedStore(t, filepath.Join(t.TempDir(), "src.db"))
|
||||||
|
defer func() { _ = src.Close() }()
|
||||||
|
|
||||||
|
repo := filepath.Join(t.TempDir(), "share")
|
||||||
|
manifest, err := Export(ctx, src, Options{RepoPath: repo, Branch: "main"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotEmpty(t, tableEntry(t, manifest, "messages").FileManifests)
|
||||||
|
|
||||||
|
dst, err := store.Open(ctx, filepath.Join(t.TempDir(), "dst.db"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer func() { _ = dst.Close() }()
|
||||||
|
_, changed, err := ImportIfChanged(ctx, dst, Options{RepoPath: repo, Branch: "main"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.True(t, changed)
|
||||||
|
|
||||||
|
now := time.Now().UTC().Format(time.RFC3339Nano)
|
||||||
|
require.NoError(t, src.UpsertMessages(ctx, []store.MessageMutation{{
|
||||||
|
Record: store.MessageRecord{
|
||||||
|
ID: "m2",
|
||||||
|
GuildID: "g1",
|
||||||
|
ChannelID: "c1",
|
||||||
|
ChannelName: "general",
|
||||||
|
AuthorID: "u1",
|
||||||
|
AuthorName: "Peter",
|
||||||
|
MessageType: 0,
|
||||||
|
CreatedAt: now,
|
||||||
|
Content: "delta landed fast",
|
||||||
|
NormalizedContent: "delta landed fast",
|
||||||
|
RawJSON: `{"author":{"username":"Peter"}}`,
|
||||||
|
},
|
||||||
|
}}))
|
||||||
|
updated, err := Export(ctx, src, Options{RepoPath: repo, Branch: "main"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotEqual(t, manifest.GeneratedAt, updated.GeneratedAt)
|
||||||
|
|
||||||
|
var progress []ImportProgress
|
||||||
|
imported, changed, err := ImportIfChanged(ctx, dst, Options{
|
||||||
|
RepoPath: repo,
|
||||||
|
Branch: "main",
|
||||||
|
Progress: func(p ImportProgress) { progress = append(progress, p) },
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.True(t, changed)
|
||||||
|
require.Equal(t, updated.GeneratedAt, imported.GeneratedAt)
|
||||||
|
require.Contains(t, progressPhases(progress), "table_start")
|
||||||
|
require.NotContains(t, progressPhases(progress), "rebuild_fts")
|
||||||
|
|
||||||
|
results, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "delta landed", Limit: 10})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, results, 1)
|
||||||
|
require.Equal(t, "m2", results[0].MessageID)
|
||||||
|
state, err := dst.GetSyncState(ctx, LastImportManifestJSONScope)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Contains(t, state, `"file_manifests"`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestImportIfChangedInfersLegacyManifestFilesFromGit(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
src := seedStore(t, filepath.Join(t.TempDir(), "src.db"))
|
||||||
|
defer func() { _ = src.Close() }()
|
||||||
|
|
||||||
|
repo := filepath.Join(t.TempDir(), "share")
|
||||||
|
require.NoError(t, exec.CommandContext(ctx, "git", "init", repo).Run())
|
||||||
|
configureGitUser(t, repo)
|
||||||
|
manifest, err := Export(ctx, src, Options{RepoPath: repo, Branch: "main"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
writeShareManifest(t, repo, stripFileManifests(manifest))
|
||||||
|
require.NoError(t, exec.CommandContext(ctx, "git", "-C", repo, "add", ".").Run())
|
||||||
|
require.NoError(t, exec.CommandContext(ctx, "git", "-C", repo, "commit", "-m", "initial snapshot").Run())
|
||||||
|
|
||||||
|
dst, err := store.Open(ctx, filepath.Join(t.TempDir(), "dst.db"))
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer func() { _ = dst.Close() }()
|
||||||
|
_, changed, err := ImportIfChanged(ctx, dst, Options{RepoPath: repo, Branch: "main"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.True(t, changed)
|
||||||
|
|
||||||
|
now := time.Now().UTC().Format(time.RFC3339Nano)
|
||||||
|
require.NoError(t, src.UpsertMessages(ctx, []store.MessageMutation{{
|
||||||
|
Record: store.MessageRecord{
|
||||||
|
ID: "m2",
|
||||||
|
GuildID: "g1",
|
||||||
|
ChannelID: "c1",
|
||||||
|
ChannelName: "general",
|
||||||
|
AuthorID: "u1",
|
||||||
|
AuthorName: "Peter",
|
||||||
|
MessageType: 0,
|
||||||
|
CreatedAt: now,
|
||||||
|
Content: "legacy git delta",
|
||||||
|
NormalizedContent: "legacy git delta",
|
||||||
|
RawJSON: `{"author":{"username":"Peter"}}`,
|
||||||
|
},
|
||||||
|
}}))
|
||||||
|
updated, err := Export(ctx, src, Options{RepoPath: repo, Branch: "main"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
writeShareManifest(t, repo, stripFileManifests(updated))
|
||||||
|
require.NoError(t, exec.CommandContext(ctx, "git", "-C", repo, "add", ".").Run())
|
||||||
|
require.NoError(t, exec.CommandContext(ctx, "git", "-C", repo, "commit", "-m", "tail snapshot").Run())
|
||||||
|
|
||||||
|
previous, ok := PreviousImportedManifest(ctx, dst, Options{RepoPath: repo, Branch: "main"})
|
||||||
|
require.True(t, ok)
|
||||||
|
planned, supported := shareIncrementalPlan(snapshot.PlanIncrementalImport(snapshotManifest(previous), snapshotManifest(enrichManifestFromGit(ctx, repo, "HEAD", stripFileManifests(updated)))))
|
||||||
|
require.True(t, supported, "%+v", planned)
|
||||||
|
require.True(t, planned.Changed(), "%+v", planned)
|
||||||
|
|
||||||
|
var progress []ImportProgress
|
||||||
|
_, changed, err = ImportIfChanged(ctx, dst, Options{
|
||||||
|
RepoPath: repo,
|
||||||
|
Branch: "main",
|
||||||
|
Progress: func(p ImportProgress) { progress = append(progress, p) },
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.True(t, changed)
|
||||||
|
require.NotContains(t, progressPhases(progress), "rebuild_fts")
|
||||||
|
results, err := dst.SearchMessages(ctx, store.SearchOptions{Query: "legacy git delta", Limit: 10})
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, results, 1)
|
||||||
|
}
|
||||||
|
|
||||||
func TestApplyImportPragmasKeepCrashRecoveryEnabled(t *testing.T) {
|
func TestApplyImportPragmasKeepCrashRecoveryEnabled(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
s := seedStore(t, filepath.Join(t.TempDir(), "dst.db"))
|
s := seedStore(t, filepath.Join(t.TempDir(), "dst.db"))
|
||||||
@ -652,6 +775,10 @@ func TestShareSmallHelpersAndValidation(t *testing.T) {
|
|||||||
require.Equal(t, `insert into "messages"("id","weird""column") values(?,?)`, insertSQL("messages", []string{"id", `weird"column`}))
|
require.Equal(t, `insert into "messages"("id","weird""column") values(?,?)`, insertSQL("messages", []string{"id", `weird"column`}))
|
||||||
require.Equal(t, "blob", exportValue([]byte("blob")))
|
require.Equal(t, "blob", exportValue([]byte("blob")))
|
||||||
require.Equal(t, "plain", exportValue("plain"))
|
require.Equal(t, "plain", exportValue("plain"))
|
||||||
|
require.Equal(t, int64(42), importValue(json.Number("42")))
|
||||||
|
require.Equal(t, 3.5, importValue(json.Number("3.5")))
|
||||||
|
require.Equal(t, "nope", importValue(json.Number("nope")))
|
||||||
|
require.Equal(t, "plain", importValue("plain"))
|
||||||
require.Equal(t, "plain", stringValue("plain"))
|
require.Equal(t, "plain", stringValue("plain"))
|
||||||
require.Equal(t, "42", stringValue(json.Number("42")))
|
require.Equal(t, "42", stringValue(json.Number("42")))
|
||||||
require.Empty(t, stringValue(42))
|
require.Empty(t, stringValue(42))
|
||||||
@ -662,6 +789,9 @@ func TestShareSmallHelpersAndValidation(t *testing.T) {
|
|||||||
query, args := snapshotExportQuery("messages")
|
query, args := snapshotExportQuery("messages")
|
||||||
require.Equal(t, "select * from messages where guild_id != ?", query)
|
require.Equal(t, "select * from messages where guild_id != ?", query)
|
||||||
require.Equal(t, []any{directMessageGuildID}, args)
|
require.Equal(t, []any{directMessageGuildID}, args)
|
||||||
|
query, args = snapshotExportQuery("guilds")
|
||||||
|
require.Equal(t, "select * from guilds where id != ?", query)
|
||||||
|
require.Equal(t, []any{directMessageGuildID}, args)
|
||||||
query, args = snapshotExportQuery("sync_state")
|
query, args = snapshotExportQuery("sync_state")
|
||||||
require.Equal(t, "select * from sync_state where scope not like 'wiretap:%'", query)
|
require.Equal(t, "select * from sync_state where scope not like 'wiretap:%'", query)
|
||||||
require.Nil(t, args)
|
require.Nil(t, args)
|
||||||
@ -672,9 +802,15 @@ func TestShareSmallHelpersAndValidation(t *testing.T) {
|
|||||||
query, args = snapshotDeleteQuery("channels")
|
query, args = snapshotDeleteQuery("channels")
|
||||||
require.Equal(t, "delete from channels where guild_id != ?", query)
|
require.Equal(t, "delete from channels where guild_id != ?", query)
|
||||||
require.Equal(t, []any{directMessageGuildID}, args)
|
require.Equal(t, []any{directMessageGuildID}, args)
|
||||||
|
query, args = snapshotDeleteQuery("guilds")
|
||||||
|
require.Equal(t, "delete from guilds where id != ?", query)
|
||||||
|
require.Equal(t, []any{directMessageGuildID}, args)
|
||||||
query, args = snapshotDeleteQuery("message_events")
|
query, args = snapshotDeleteQuery("message_events")
|
||||||
require.Equal(t, "delete from message_events where guild_id != ?", query)
|
require.Equal(t, "delete from message_events where guild_id != ?", query)
|
||||||
require.Equal(t, []any{directMessageGuildID}, args)
|
require.Equal(t, []any{directMessageGuildID}, args)
|
||||||
|
query, args = snapshotDeleteQuery("sync_state")
|
||||||
|
require.Equal(t, "delete from sync_state where scope not like 'wiretap:%'", query)
|
||||||
|
require.Nil(t, args)
|
||||||
query, args = snapshotDeleteQuery("custom")
|
query, args = snapshotDeleteQuery("custom")
|
||||||
require.Equal(t, "delete from custom", query)
|
require.Equal(t, "delete from custom", query)
|
||||||
require.Nil(t, args)
|
require.Nil(t, args)
|
||||||
@ -684,6 +820,20 @@ func TestShareSmallHelpersAndValidation(t *testing.T) {
|
|||||||
require.True(t, isDirectMessageSnapshotRow("sync_state", map[string]any{"scope": "wiretap:last_import"}))
|
require.True(t, isDirectMessageSnapshotRow("sync_state", map[string]any{"scope": "wiretap:last_import"}))
|
||||||
require.False(t, isDirectMessageSnapshotRow("sync_state", map[string]any{"scope": "share:last_import"}))
|
require.False(t, isDirectMessageSnapshotRow("sync_state", map[string]any{"scope": "share:last_import"}))
|
||||||
require.False(t, isDirectMessageSnapshotRow("custom", map[string]any{"guild_id": directMessageGuildID}))
|
require.False(t, isDirectMessageSnapshotRow("custom", map[string]any{"guild_id": directMessageGuildID}))
|
||||||
|
require.True(t, isLocalOnlyGuildID(directMessageGuildID))
|
||||||
|
require.False(t, isLocalOnlyGuildID("g1"))
|
||||||
|
|
||||||
|
require.Equal(t, []string{"message_id", "guild_id"}, importColumns(TableManifest{Name: "message_events", Columns: []string{"event_id", "message_id", "guild_id"}}))
|
||||||
|
require.Equal(t, []string{"event_id", "message_id"}, importColumns(TableManifest{Name: "messages", Columns: []string{"event_id", "message_id"}}))
|
||||||
|
require.Equal(t, 7, manifestRowCount(Manifest{
|
||||||
|
Tables: []TableManifest{{Rows: 2}, {Rows: 3}},
|
||||||
|
Embeddings: []EmbeddingManifest{{Rows: 2}},
|
||||||
|
}))
|
||||||
|
var seen []ImportProgress
|
||||||
|
Options{Progress: func(progress ImportProgress) { seen = append(seen, progress) }}.reportProgress(ImportProgress{Phase: "phase"})
|
||||||
|
require.Equal(t, []ImportProgress{{Phase: "phase"}}, seen)
|
||||||
|
Options{}.reportProgress(ImportProgress{Phase: "ignored"})
|
||||||
|
require.Equal(t, mirror.Options{RepoPath: "repo", Remote: "origin", Branch: "main"}, mirrorOptions(Options{RepoPath: "repo", Remote: "origin", Branch: "main"}))
|
||||||
|
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
cw := &countingWriter{w: &buf}
|
cw := &countingWriter{w: &buf}
|
||||||
@ -853,6 +1003,13 @@ func writeShareManifest(t *testing.T, repo string, manifest Manifest) {
|
|||||||
require.NoError(t, os.WriteFile(filepath.Join(repo, ManifestName), append(body, '\n'), 0o600))
|
require.NoError(t, os.WriteFile(filepath.Join(repo, ManifestName), append(body, '\n'), 0o600))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func stripFileManifests(manifest Manifest) Manifest {
|
||||||
|
for i := range manifest.Tables {
|
||||||
|
manifest.Tables[i].FileManifests = nil
|
||||||
|
}
|
||||||
|
return manifest
|
||||||
|
}
|
||||||
|
|
||||||
func snapshotTableText(t *testing.T, repo string, table TableManifest) string {
|
func snapshotTableText(t *testing.T, repo string, table TableManifest) string {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
return snapshotFilesText(t, repo, table.Files)
|
return snapshotFilesText(t, repo, table.Files)
|
||||||
|
|||||||
@ -8,7 +8,7 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
crawlstore "github.com/vincentkoc/crawlkit/store"
|
crawlstore "github.com/openclaw/crawlkit/store"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/bwmarrin/discordgo"
|
"github.com/bwmarrin/discordgo"
|
||||||
"github.com/vincentkoc/crawlkit/progress"
|
"github.com/openclaw/crawlkit/progress"
|
||||||
|
|
||||||
"github.com/openclaw/discrawl/internal/store"
|
"github.com/openclaw/discrawl/internal/store"
|
||||||
)
|
)
|
||||||
@ -187,7 +187,7 @@ func (s *Syncer) syncMessageChannelsConcurrent(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Syncer) clearUnavailableChannel(ctx context.Context, channelID string) error {
|
func (s *Syncer) clearUnavailableChannel(ctx context.Context, channelID string) error {
|
||||||
if s.store == nil || channelID == "" {
|
if s == nil || s.store == nil || channelID == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return s.store.DeleteSyncState(ctx, "channel:"+channelID+":unavailable")
|
return s.store.DeleteSyncState(ctx, "channel:"+channelID+":unavailable")
|
||||||
@ -616,6 +616,9 @@ func (p *messageSyncProgress) record(channel *discordgo.Channel, count int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *messageSyncProgress) recordSkip(channel *discordgo.Channel, err error) {
|
func (p *messageSyncProgress) recordSkip(channel *discordgo.Channel, err error) {
|
||||||
|
if p == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
outcome := syncErrorOutcome(err)
|
outcome := syncErrorOutcome(err)
|
||||||
p.mu.Lock()
|
p.mu.Lock()
|
||||||
switch outcome {
|
switch outcome {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user