feat: add crawling tool plugins

Adds discrawl and wacrawl packages, plugin flakes, skills, updater wiring, and README entries.
This commit is contained in:
Dave Dennis 2026-04-25 21:16:51 -05:00 committed by GitHub
parent 3584e2b6d2
commit 26a27195ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 363 additions and 0 deletions

View File

@ -21,6 +21,8 @@ These tools are essential for a capable openclaw instance - screen capture, came
| Tool | What it does |
|------|--------------|
| [**summarize**](https://github.com/steipete/summarize) | Link → clean text → summary |
| [**discrawl**](https://github.com/steipete/discrawl) | Mirror Discord into SQLite and search history locally |
| [**wacrawl**](https://github.com/steipete/wacrawl) | Read-only local archive and search for WhatsApp Desktop data |
| [**gogcli**](https://github.com/steipete/gogcli) | Google CLI for Gmail, Calendar, Drive, and Contacts |
| [**goplaces**](https://github.com/steipete/goplaces) | Google Places API (New) CLI |
| [**camsnap**](https://github.com/steipete/camsnap) | Capture snapshots/clips from RTSP/ONVIF cameras |
@ -37,8 +39,10 @@ Each tool is a subflake under `tools/<tool>/` exporting `openclawPlugin`. Point
```nix
programs.openclaw.plugins = [
{ source = "github:openclaw/nix-steipete-tools?dir=tools/camsnap"; }
{ source = "github:openclaw/nix-steipete-tools?dir=tools/discrawl"; }
{ source = "github:openclaw/nix-steipete-tools?dir=tools/peekaboo"; }
{ source = "github:openclaw/nix-steipete-tools?dir=tools/summarize"; }
{ source = "github:openclaw/nix-steipete-tools?dir=tools/wacrawl"; }
];
```
@ -56,13 +60,17 @@ inputs.nix-steipete-tools.url = "github:openclaw/nix-steipete-tools";
# Then use:
inputs.nix-steipete-tools.packages.aarch64-darwin.camsnap
inputs.nix-steipete-tools.packages.aarch64-darwin.discrawl
inputs.nix-steipete-tools.packages.aarch64-darwin.peekaboo
inputs.nix-steipete-tools.packages.aarch64-darwin.wacrawl
# etc.
# Linux examples:
inputs.nix-steipete-tools.packages.x86_64-linux.camsnap
inputs.nix-steipete-tools.packages.x86_64-linux.discrawl
inputs.nix-steipete-tools.packages.aarch64-linux.gogcli
inputs.nix-steipete-tools.packages.x86_64-linux.summarize
inputs.nix-steipete-tools.packages.x86_64-linux.wacrawl
```
## Skills syncing

View File

@ -60,6 +60,8 @@ func main() {
mappings := []Mapping{
{"summarize", "skills/summarize"},
{"discrawl", "skills/discrawl"},
{"wacrawl", "skills/wacrawl"},
{"gogcli", "skills/gog"},
{"camsnap", "skills/camsnap"},
{"sonoscli", "skills/sonoscli"},

View File

@ -147,6 +147,26 @@ func main() {
}
tools := []Tool{
{
Name: "discrawl",
Repo: "steipete/discrawl",
Assets: []AssetSpec{
{System: "aarch64-darwin", Regex: regexp.MustCompile(`discrawl_[0-9.]+_darwin_arm64\.tar\.gz`)},
{System: "x86_64-linux", Regex: regexp.MustCompile(`discrawl_[0-9.]+_linux_amd64\.tar\.gz`)},
{System: "aarch64-linux", Regex: regexp.MustCompile(`discrawl_[0-9.]+_linux_arm64\.tar\.gz`)},
},
NixFile: filepath.Join(repoRoot, "nix", "pkgs", "discrawl.nix"),
},
{
Name: "wacrawl",
Repo: "steipete/wacrawl",
Assets: []AssetSpec{
{System: "aarch64-darwin", Regex: regexp.MustCompile(`wacrawl_[0-9.]+_darwin_arm64\.tar\.gz`)},
{System: "x86_64-linux", Regex: regexp.MustCompile(`wacrawl_[0-9.]+_linux_amd64\.tar\.gz`)},
{System: "aarch64-linux", Regex: regexp.MustCompile(`wacrawl_[0-9.]+_linux_arm64\.tar\.gz`)},
},
NixFile: filepath.Join(repoRoot, "nix", "pkgs", "wacrawl.nix"),
},
{
Name: "gogcli",
Repo: "steipete/gogcli",

View File

@ -12,6 +12,8 @@
forAllSystems = f: lib.genAttrs systems (system: f system);
packageSystems = {
summarize = [ "aarch64-darwin" "x86_64-linux" "aarch64-linux" ];
discrawl = [ "aarch64-darwin" "x86_64-linux" "aarch64-linux" ];
wacrawl = [ "aarch64-darwin" "x86_64-linux" "aarch64-linux" ];
gogcli = [ "aarch64-darwin" "x86_64-linux" "aarch64-linux" ];
goplaces = [ "aarch64-darwin" "x86_64-linux" "aarch64-linux" ];
camsnap = [ "aarch64-darwin" "x86_64-linux" "aarch64-linux" ];
@ -34,6 +36,12 @@
nodejs = if pkgs ? nodejs_22 then pkgs.nodejs_22 else pkgs.nodejs;
};
})
// (lib.optionalAttrs (supports "discrawl") {
discrawl = pkgs.callPackage ./nix/pkgs/discrawl.nix {};
})
// (lib.optionalAttrs (supports "wacrawl") {
wacrawl = pkgs.callPackage ./nix/pkgs/wacrawl.nix {};
})
// (lib.optionalAttrs (supports "gogcli") {
gogcli = pkgs.callPackage ./nix/pkgs/gogcli.nix {};
})

53
nix/pkgs/discrawl.nix Normal file
View File

@ -0,0 +1,53 @@
{ lib, stdenv, fetchurl }:
let
sources = {
"aarch64-darwin" = {
url = "https://github.com/steipete/discrawl/releases/download/v0.6.0/discrawl_0.6.0_darwin_arm64.tar.gz";
hash = "sha256-VAGD2mmbZP3pxBgtzg9n3DE89NDLKD5iUIVMbJYwaOo=";
};
"x86_64-linux" = {
url = "https://github.com/steipete/discrawl/releases/download/v0.6.0/discrawl_0.6.0_linux_amd64.tar.gz";
hash = "sha256-KQlpw/I1asBK4+3IlbCiO76+ysJIMVSSuwqrr9UG190=";
};
"aarch64-linux" = {
url = "https://github.com/steipete/discrawl/releases/download/v0.6.0/discrawl_0.6.0_linux_arm64.tar.gz";
hash = "sha256-2uRQonrM4THiSyNUir9bgOyaTWHbPv4brIPCNW4yU4s=";
};
};
in
stdenv.mkDerivation {
pname = "discrawl";
version = "0.6.0";
src = fetchurl sources.${stdenv.hostPlatform.system};
dontConfigure = true;
dontBuild = true;
unpackPhase = ''
tar -xzf "$src"
'';
installPhase = ''
runHook preInstall
mkdir -p "$out/bin" "$out/share/doc/discrawl"
cp $(find . -type f -name discrawl | head -1) "$out/bin/discrawl"
chmod 0755 "$out/bin/discrawl"
if [ -f LICENSE ]; then
cp LICENSE "$out/share/doc/discrawl/"
fi
if [ -f README.md ]; then
cp README.md "$out/share/doc/discrawl/"
fi
runHook postInstall
'';
meta = with lib; {
description = "Mirror Discord into SQLite and search server history locally";
homepage = "https://github.com/steipete/discrawl";
license = licenses.mit;
platforms = builtins.attrNames sources;
mainProgram = "discrawl";
};
}

53
nix/pkgs/wacrawl.nix Normal file
View File

@ -0,0 +1,53 @@
{ lib, stdenv, fetchurl }:
let
sources = {
"aarch64-darwin" = {
url = "https://github.com/steipete/wacrawl/releases/download/v0.1.0/wacrawl_0.1.0_darwin_arm64.tar.gz";
hash = "sha256-GJw2RODtrn1uOdduq9lSYAWzXVkSQPMBuiweyCwov6g=";
};
"x86_64-linux" = {
url = "https://github.com/steipete/wacrawl/releases/download/v0.1.0/wacrawl_0.1.0_linux_amd64.tar.gz";
hash = "sha256-cMNmH/Xj++5dAIiphVz7e7DYDwM2xx+RlEVxBKw7kJw=";
};
"aarch64-linux" = {
url = "https://github.com/steipete/wacrawl/releases/download/v0.1.0/wacrawl_0.1.0_linux_arm64.tar.gz";
hash = "sha256-QybKaF1lmqGVDUwqBLWfEtJwkT/BB9KsJgL1a/m40hQ=";
};
};
in
stdenv.mkDerivation {
pname = "wacrawl";
version = "0.1.0";
src = fetchurl sources.${stdenv.hostPlatform.system};
dontConfigure = true;
dontBuild = true;
unpackPhase = ''
tar -xzf "$src"
'';
installPhase = ''
runHook preInstall
mkdir -p "$out/bin" "$out/share/doc/wacrawl"
cp $(find . -type f -name wacrawl | head -1) "$out/bin/wacrawl"
chmod 0755 "$out/bin/wacrawl"
if [ -f LICENSE ]; then
cp LICENSE "$out/share/doc/wacrawl/"
fi
if [ -f README.md ]; then
cp README.md "$out/share/doc/wacrawl/"
fi
runHook postInstall
'';
meta = with lib; {
description = "Read-only local archive and search for WhatsApp Desktop data";
homepage = "https://github.com/steipete/wacrawl";
license = licenses.mit;
platforms = builtins.attrNames sources;
mainProgram = "wacrawl";
};
}

38
tools/discrawl/flake.nix Normal file
View File

@ -0,0 +1,38 @@
{
description = "openclaw plugin: discrawl";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs?rev=16c7794d0a28b5a37904d55bcca36003b9109aaa&narHash=sha256-fFUnEYMla8b7UKjijLnMe%2BoVFOz6HjijGGNS1l7dYaQ%3D";
root.url = "path:../..";
};
outputs = { self, nixpkgs, root }:
let
lib = nixpkgs.lib;
systems = builtins.attrNames root.packages;
pluginFor = system:
let
packagesForSystem = root.packages.${system} or {};
discrawl = packagesForSystem.discrawl or null;
in
if discrawl == null then null else {
name = "discrawl";
skills = [ ./skills/discrawl ];
packages = [ discrawl ];
needs = {
stateDirs = [ ".discrawl" ];
requiredEnv = [ ];
};
};
in {
packages = lib.genAttrs systems (system:
let
discrawl = (root.packages.${system} or {}).discrawl or null;
in
if discrawl == null then {}
else { discrawl = discrawl; }
);
openclawPlugin = pluginFor;
};
}

View File

@ -0,0 +1,74 @@
---
name: discrawl
description: Mirror Discord guild history into local SQLite and query it offline with search, messages, mentions, reports, and DM wiretap import.
homepage: https://github.com/steipete/discrawl
metadata:
{
"openclaw":
{
"emoji": "🛰️",
"requires": { "bins": ["discrawl"] },
"install":
[
{
"id": "brew",
"kind": "brew",
"formula": "steipete/tap/discrawl",
"bins": ["discrawl"],
"label": "Install discrawl (brew)",
},
],
},
}
---
# discrawl
Use `discrawl` to mirror Discord guild data into local SQLite, then query it offline.
## When to Use
Use this skill when the user wants to:
- search Discord history locally without relying on Discord search
- archive a guild into SQLite for later queries
- inspect recent messages, mentions, channels, or members from a local archive
- import local Discord Desktop cache data for DM recovery/search
- publish or subscribe to a Git-backed Discord archive snapshot
## Requirements
- Discord bot token for guild sync, or an existing OpenClaw Discord config
- local Discord Desktop cache files only if using `wiretap`
- enough local disk for SQLite archive growth
## Setup
- Default config: `~/.discrawl/config.toml`
- Default database: `~/.discrawl/discrawl.db`
- Fastest setup when OpenClaw already has Discord configured:
- `discrawl init --from-openclaw ~/.openclaw/openclaw.json`
- Env-only setup:
- `export DISCORD_BOT_TOKEN="..."`
- `discrawl init`
## Common Commands
- Doctor: `discrawl doctor`
- Initial history: `discrawl sync --full`
- Incremental refresh: `discrawl sync`
- Live tail: `discrawl tail`
- Search: `discrawl search "panic nil pointer"`
- Recent channel messages: `discrawl messages --channel general --hours 24`
- Mentions: `discrawl mentions --user <user-id>`
- DM cache import: `discrawl wiretap`
- Local DM search: `discrawl dms --search "launch checklist"`
- Read-only SQL: `discrawl sql "select count(*) from messages"`
- Git-backed reader mode: `discrawl subscribe <private-repo-url>`
## Notes
- Bot-token sync reads only guilds/channels the bot can access.
- `wiretap` uses local Discord Desktop cache files only; it does not use a user token.
- Prefer `discrawl doctor` before a first sync.
- Use `sync --full` once for backfill, then plain `sync` for routine refreshes.

38
tools/wacrawl/flake.nix Normal file
View File

@ -0,0 +1,38 @@
{
description = "openclaw plugin: wacrawl";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs?rev=16c7794d0a28b5a37904d55bcca36003b9109aaa&narHash=sha256-fFUnEYMla8b7UKjijLnMe%2BoVFOz6HjijGGNS1l7dYaQ%3D";
root.url = "path:../..";
};
outputs = { self, nixpkgs, root }:
let
lib = nixpkgs.lib;
systems = builtins.attrNames root.packages;
pluginFor = system:
let
packagesForSystem = root.packages.${system} or {};
wacrawl = packagesForSystem.wacrawl or null;
in
if wacrawl == null then null else {
name = "wacrawl";
skills = [ ./skills/wacrawl ];
packages = [ wacrawl ];
needs = {
stateDirs = [ ".wacrawl" ];
requiredEnv = [ ];
};
};
in {
packages = lib.genAttrs systems (system:
let
wacrawl = (root.packages.${system} or {}).wacrawl or null;
in
if wacrawl == null then {}
else { wacrawl = wacrawl; }
);
openclawPlugin = pluginFor;
};
}

View File

@ -0,0 +1,69 @@
---
name: wacrawl
description: Read-only local archive and search for WhatsApp Desktop chats, messages, and media metadata.
homepage: https://github.com/steipete/wacrawl
metadata:
{
"openclaw":
{
"emoji": "💬",
"requires": { "bins": ["wacrawl"] },
"install":
[
{
"id": "brew",
"kind": "brew",
"formula": "steipete/tap/wacrawl",
"bins": ["wacrawl"],
"label": "Install wacrawl (brew)",
},
],
},
}
---
# wacrawl
Use `wacrawl` to snapshot local WhatsApp Desktop data into a separate SQLite archive and search it offline.
## When to Use
Use this skill when the user wants to:
- inspect local WhatsApp Desktop history without opening the app
- archive chats into a local SQLite database for repeat queries
- search WhatsApp messages locally with filters
- list chats, recent messages, or archive status from a read-only import
## Requirements
- local WhatsApp Desktop data on the same machine
- enough local disk for `~/.wacrawl/wacrawl.db`
- understand that this is read-only inspection, not message sending
## Setup
- Default source: `~/Library/Group Containers/group.net.whatsapp.WhatsApp.shared`
- Default archive DB: `~/.wacrawl/wacrawl.db`
- First sanity check:
- `wacrawl doctor`
- First import:
- `wacrawl import`
## Common Commands
- Doctor: `wacrawl doctor`
- Import fresh snapshot: `wacrawl import`
- Archive status: `wacrawl status`
- List chats: `wacrawl chats --limit 20`
- Recent messages: `wacrawl messages --limit 20`
- One chat: `wacrawl messages --chat 1234567890@s.whatsapp.net --limit 50`
- Search: `wacrawl search "release notes"`
- Filtered search: `wacrawl --json search "invoice" --from-them --after 2026-01-01`
## Notes
- `wacrawl` is read-only and does not send messages.
- It copies WhatsApp SQLite files into a temp snapshot before import.
- Use `--source` to override the WhatsApp Desktop container path.
- Use `--db` to archive somewhere other than `~/.wacrawl/wacrawl.db`.