package qmd for OpenClaw tools

What:
- add a source-built qmd package and plugin metadata
- add a no-model-download qmd smoke check
- teach update-tools to notice qmd releases and the new sonoscli asset names

Why:
- nix-openclaw needs QMD as an internal runtime battery on Darwin and Linux
- the maintainer automation should not require a separate manual qmd bump path

Tests:
- nix build .#qmd .#checks.aarch64-darwin.qmd-smoke --accept-flake-config --no-link
- nix shell nixpkgs#go --command go test ./...
This commit is contained in:
joshp123 2026-05-06 09:31:57 +02:00
parent 1732fb7a5c
commit a0e7ac5ef1
8 changed files with 498 additions and 2 deletions

View File

@ -44,6 +44,7 @@ Homebrew configuration, not in these pure Nix package/module definitions.
| [**poltergeist**](https://github.com/steipete/poltergeist) | Universal file watcher with auto-rebuild | | [**poltergeist**](https://github.com/steipete/poltergeist) | Universal file watcher with auto-rebuild |
| [**sag**](https://github.com/steipete/sag) | Command-line ElevenLabs TTS with mac-style flags | | [**sag**](https://github.com/steipete/sag) | Command-line ElevenLabs TTS with mac-style flags |
| [**imsg**](https://github.com/openclaw/imsg) | iMessage/SMS CLI | | [**imsg**](https://github.com/openclaw/imsg) | iMessage/SMS CLI |
| [**qmd**](https://github.com/tobi/qmd) | On-device hybrid search for markdown knowledge bases |
## Usage (as openclaw plugins) ## Usage (as openclaw plugins)
@ -106,6 +107,10 @@ go run ./cmd/update-tools
Fetches latest release versions/URLs/hashes and updates the Nix expressions. Fetches latest release versions/URLs/hashes and updates the Nix expressions.
QMD is source-packaged because upstream does not publish release assets. Keep it
fresh through the same maintainer automation path, but do not make its smoke
check pull embedding/reranking models; model prewarming belongs in nix-openclaw.
## CI ## CI
| Workflow | Schedule | What it does | | Workflow | Schedule | What it does |

View File

@ -2,7 +2,9 @@ package main
import ( import (
"fmt" "fmt"
"io"
"log" "log"
"net/http"
"os" "os"
"path/filepath" "path/filepath"
"regexp" "regexp"
@ -68,6 +70,107 @@ func updateSourceBlock(path, system, url, hash string) error {
}) })
} }
func readVersion(path string) (string, error) {
data, err := os.ReadFile(path)
if err != nil {
return "", err
}
match := regexp.MustCompile(`version = "([^"]+)";`).FindStringSubmatch(string(data))
if len(match) < 2 {
return "", fmt.Errorf("version not found in %s", path)
}
return match[1], nil
}
func fetchText(url string) (string, error) {
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return "", err
}
if token := os.Getenv("GH_TOKEN"); token != "" {
req.Header.Set("Authorization", "Bearer "+token)
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
body, _ := io.ReadAll(resp.Body)
return "", fmt.Errorf("fetch %s: %s: %s", url, resp.Status, string(body))
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(body), nil
}
func qmdNodeModulesHash(upstreamFlake, system string) (string, error) {
re := regexp.MustCompile(fmt.Sprintf(`"?%s"?\s*=\s*"([^"]+)";`, regexp.QuoteMeta(system)))
match := re.FindStringSubmatch(upstreamFlake)
if len(match) < 2 {
return "", fmt.Errorf("qmd nodeModules hash for %s not found upstream", system)
}
hash := match[1]
if strings.Contains(hash, "AAAAAAAA") || strings.Contains(hash, "fake") {
return "", fmt.Errorf("qmd nodeModules hash for %s is not populated upstream", system)
}
return hash, nil
}
func updateQMD(repoRoot string) error {
log.Printf("[update-tools] qmd")
qmdFile := filepath.Join(repoRoot, "nix", "pkgs", "qmd.nix")
currentVersion, err := readVersion(qmdFile)
if err != nil {
return err
}
rel, err := internal.LatestRelease("tobi/qmd")
if err != nil {
return err
}
version := strings.TrimPrefix(rel.TagName, "v")
if currentVersion == version {
return nil
}
srcHash, err := internal.PrefetchGitHub("tobi", "qmd", "v"+version)
if err != nil {
return err
}
upstreamFlake, err := fetchText(fmt.Sprintf("https://raw.githubusercontent.com/tobi/qmd/v%s/flake.nix", version))
if err != nil {
return err
}
nodeHashes := map[string]string{}
for _, system := range []string{"aarch64-darwin", "x86_64-linux"} {
hash, err := qmdNodeModulesHash(upstreamFlake, system)
if err != nil {
return err
}
nodeHashes[system] = hash
}
if err := internal.ReplaceOnce(qmdFile, regexp.MustCompile(`version = "[^"]+";`), fmt.Sprintf(`version = "%s";`, version)); err != nil {
return err
}
srcRe := regexp.MustCompile(`(?s)src = fetchFromGitHub \{.*?hash = "sha256-[^"]+";`)
if err := internal.ReplaceOnceFunc(qmdFile, srcRe, func(s string) string {
return regexp.MustCompile(`hash = "sha256-[^"]+";`).ReplaceAllString(s, fmt.Sprintf(`hash = "%s";`, srcHash))
}); err != nil {
return err
}
for system, hash := range nodeHashes {
re := regexp.MustCompile(fmt.Sprintf(`"%s" = "sha256-[^"]+";`, regexp.QuoteMeta(system)))
if err := internal.ReplaceOnce(qmdFile, re, fmt.Sprintf(`"%s" = "%s";`, system, hash)); err != nil {
return err
}
}
return nil
}
func updateSummarize(repoRoot string) error { func updateSummarize(repoRoot string) error {
log.Printf("[update-tools] summarize") log.Printf("[update-tools] summarize")
summarizeFile := filepath.Join(repoRoot, "nix", "pkgs", "summarize.nix") summarizeFile := filepath.Join(repoRoot, "nix", "pkgs", "summarize.nix")
@ -202,7 +305,7 @@ func main() {
Name: "sonoscli", Name: "sonoscli",
Repo: "steipete/sonoscli", Repo: "steipete/sonoscli",
Assets: []AssetSpec{ Assets: []AssetSpec{
{System: "aarch64-darwin", Regex: regexp.MustCompile(`sonoscli-macos-arm64\.tar\.gz`)}, {System: "aarch64-darwin", Regex: regexp.MustCompile(`sonoscli_[0-9.]+_darwin_arm64\.tar\.gz`)},
{System: "x86_64-linux", Regex: regexp.MustCompile(`sonoscli_[0-9.]+_linux_amd64\.tar\.gz`)}, {System: "x86_64-linux", Regex: regexp.MustCompile(`sonoscli_[0-9.]+_linux_amd64\.tar\.gz`)},
{System: "aarch64-linux", Regex: regexp.MustCompile(`sonoscli_[0-9.]+_linux_arm64\.tar\.gz`)}, {System: "aarch64-linux", Regex: regexp.MustCompile(`sonoscli_[0-9.]+_linux_arm64\.tar\.gz`)},
}, },
@ -246,6 +349,9 @@ func main() {
if err := updateSummarize(repoRoot); err != nil { if err := updateSummarize(repoRoot); err != nil {
log.Fatalf("update summarize failed: %v", err) log.Fatalf("update summarize failed: %v", err)
} }
if err := updateQMD(repoRoot); err != nil {
log.Fatalf("update qmd failed: %v", err)
}
for _, tool := range tools { for _, tool := range tools {
if err := updateTool(tool); err != nil { if err := updateTool(tool); err != nil {
if tool.Optional { if tool.Optional {

View File

@ -0,0 +1,33 @@
package main
import "testing"
func TestQMDNodeModulesHash(t *testing.T) {
upstream := `
nodeModulesHashes = {
x86_64-linux = "sha256-linux";
aarch64-darwin = "sha256-darwin";
};
`
got, err := qmdNodeModulesHash(upstream, "aarch64-darwin")
if err != nil {
t.Fatal(err)
}
if got != "sha256-darwin" {
t.Fatalf("got %q", got)
}
}
func TestQMDNodeModulesHashRejectsFake(t *testing.T) {
upstream := `
nodeModulesHashes = {
aarch64-darwin = "sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=";
};
`
_, err := qmdNodeModulesHash(upstream, "aarch64-darwin")
if err == nil {
t.Fatal("expected fake hash to be rejected")
}
}

View File

@ -22,6 +22,7 @@
poltergeist = [ "aarch64-darwin" ]; poltergeist = [ "aarch64-darwin" ];
sag = [ "aarch64-darwin" "x86_64-linux" ]; sag = [ "aarch64-darwin" "x86_64-linux" ];
imsg = [ "aarch64-darwin" ]; imsg = [ "aarch64-darwin" ];
qmd = [ "aarch64-darwin" "x86_64-linux" ];
}; };
in { in {
packages = forAllSystems (system: packages = forAllSystems (system:
@ -66,8 +67,22 @@
// (lib.optionalAttrs (supports "imsg") { // (lib.optionalAttrs (supports "imsg") {
imsg = pkgs.callPackage ./nix/pkgs/imsg.nix {}; imsg = pkgs.callPackage ./nix/pkgs/imsg.nix {};
}) })
// (lib.optionalAttrs (supports "qmd") {
qmd = pkgs.callPackage ./nix/pkgs/qmd.nix {};
})
); );
checks = forAllSystems (system: self.packages.${system}); checks = forAllSystems (system:
let
pkgs = import nixpkgs { inherit system; };
packages = self.packages.${system};
in
packages
// (lib.optionalAttrs (packages ? qmd) {
qmd-smoke = pkgs.callPackage ./nix/checks/qmd-smoke.nix {
qmd = packages.qmd;
};
})
);
}; };
} }

25
nix/checks/qmd-smoke.nix Normal file
View File

@ -0,0 +1,25 @@
{
runCommand,
qmd,
}:
runCommand "qmd-smoke" { nativeBuildInputs = [ qmd ]; } ''
set -eu
export HOME="$TMPDIR/home"
export XDG_CONFIG_HOME="$TMPDIR/config"
export XDG_CACHE_HOME="$TMPDIR/cache"
export XDG_DATA_HOME="$TMPDIR/data"
mkdir -p "$HOME" "$XDG_CONFIG_HOME" "$XDG_CACHE_HOME" "$XDG_DATA_HOME" "$TMPDIR/notes"
printf '%s\n\n%s\n' '# Smoke' 'qmd packaging smoke' > "$TMPDIR/notes/smoke.md"
qmd --help >/dev/null
qmd collection list >/dev/null
qmd collection add "$TMPDIR/notes" --name smoke
qmd update
qmd search packaging --json | grep -q packaging
qmd status >/dev/null
touch "$out"
''

121
nix/pkgs/qmd.nix Normal file
View File

@ -0,0 +1,121 @@
{
lib,
stdenv,
stdenvNoCC,
fetchFromGitHub,
bun,
makeWrapper,
nodejs,
node-gyp,
python3,
sqlite,
darwin,
}:
let
pname = "qmd";
version = "2.1.0";
src = fetchFromGitHub {
owner = "tobi";
repo = "qmd";
rev = "v${version}";
hash = "sha256-bqIVaNRTa8H5vrw3RwsD7QdtTa0xNvRuEVzlzE1hIBQ=";
};
nodeModulesHashes = {
"aarch64-darwin" = "sha256-qU+9KdR/nTocelyANS09I/4yaQ+7s1LvJNqB27IOK/c=";
"x86_64-linux" = "sha256-D0ezO4vqq4iswcAMU2DCql9ZAQvh3me6N9aDB5roq4w=";
};
system = stdenv.hostPlatform.system;
nodeModules = stdenvNoCC.mkDerivation {
pname = "qmd-node-modules";
inherit version src;
impureEnvVars = lib.fetchers.proxyImpureEnvVars ++ [
"GIT_PROXY_COMMAND"
"SOCKS_SERVER"
];
nativeBuildInputs = [ bun ];
dontConfigure = true;
buildPhase = ''
runHook preBuild
export HOME="$(mktemp -d)"
bun install \
--backend copyfile \
--frozen-lockfile \
--ignore-scripts \
--no-progress \
--production
runHook postBuild
'';
installPhase = ''
runHook preInstall
mkdir -p "$out"
cp -R node_modules "$out/"
runHook postInstall
'';
dontFixup = true;
outputHash = nodeModulesHashes.${system};
outputHashAlgo = "sha256";
outputHashMode = "recursive";
};
in
stdenv.mkDerivation {
inherit pname version src;
nativeBuildInputs = [
bun
makeWrapper
nodejs
node-gyp
python3
]
++ lib.optionals stdenv.hostPlatform.isDarwin [
darwin.cctools
];
buildInputs = [ sqlite ];
dontConfigure = true;
buildPhase = ''
runHook preBuild
export HOME="$(mktemp -d)"
cp -R ${nodeModules}/node_modules ./
chmod -R u+w node_modules
(cd node_modules/better-sqlite3 && node-gyp rebuild --release)
runHook postBuild
'';
installPhase = ''
runHook preInstall
mkdir -p "$out/bin" "$out/lib/qmd"
cp -r node_modules src package.json "$out/lib/qmd/"
makeWrapper ${bun}/bin/bun "$out/bin/qmd" \
--add-flags "$out/lib/qmd/src/cli/qmd.ts" \
--set DYLD_LIBRARY_PATH "${sqlite.out}/lib" \
--set LD_LIBRARY_PATH "${sqlite.out}/lib"
runHook postInstall
'';
meta = with lib; {
description = "On-device hybrid search for markdown knowledge bases";
homepage = "https://github.com/tobi/qmd";
license = licenses.mit;
platforms = builtins.attrNames nodeModulesHashes;
mainProgram = "qmd";
};
}

48
tools/qmd/flake.nix Normal file
View File

@ -0,0 +1,48 @@
{
description = "openclaw plugin: qmd";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs?rev=16c7794d0a28b5a37904d55bcca36003b9109aaa&narHash=sha256-fFUnEYMla8b7UKjijLnMe%2BoVFOz6HjijGGNS1l7dYaQ%3D";
root.url = "../..";
};
outputs =
{
self,
nixpkgs,
root,
}:
let
lib = nixpkgs.lib;
systems = builtins.attrNames root.packages;
pluginFor =
system:
let
packagesForSystem = root.packages.${system} or { };
qmd = packagesForSystem.qmd or null;
in
if qmd == null then
null
else
{
name = "qmd";
skills = [ ./skills/qmd ];
packages = [ qmd ];
needs = {
stateDirs = [ ".local/share/qmd" ];
requiredEnv = [ ];
};
};
in
{
packages = lib.genAttrs systems (
system:
let
qmd = (root.packages.${system} or { }).qmd or null;
in
if qmd == null then { } else { qmd = qmd; }
);
openclawPlugin = pluginFor;
};
}

View File

@ -0,0 +1,143 @@
---
name: qmd
description: Search markdown knowledge bases, notes, and documentation using QMD. Use when users ask to search notes, find documents, or look up information.
license: MIT
compatibility: Requires qmd CLI or MCP server. In Nix OpenClaw, use the qmd package/plugin from nix-openclaw-tools.
metadata:
author: tobi
version: "2.0.0"
allowed-tools: Bash(qmd:*), mcp__qmd__*
---
# QMD - Quick Markdown Search
Local search engine for markdown content.
## Status
!`qmd status 2>/dev/null || echo "qmd CLI not found on this runtime PATH"`
## MCP: `query`
```json
{
"searches": [
{ "type": "lex", "query": "CAP theorem consistency" },
{ "type": "vec", "query": "tradeoff between consistency and availability" }
],
"collections": ["docs"],
"limit": 10
}
```
### Query Types
| Type | Method | Input |
|------|--------|-------|
| `lex` | BM25 | Keywords — exact terms, names, code |
| `vec` | Vector | Question — natural language |
| `hyde` | Vector | Answer — hypothetical result (50-100 words) |
### Writing Good Queries
**lex (keyword)**
- 2-5 terms, no filler words
- Exact phrase: `"connection pool"` (quoted)
- Exclude terms: `performance -sports` (minus prefix)
- Code identifiers work: `handleError async`
**vec (semantic)**
- Full natural language question
- Be specific: `"how does the rate limiter handle burst traffic"`
- Include context: `"in the payment service, how are refunds processed"`
**hyde (hypothetical document)**
- Write 50-100 words of what the *answer* looks like
- Use the vocabulary you expect in the result
**expand (auto-expand)**
- Use a single-line query (implicit) or `expand: question` on its own line
- Lets the local LLM generate lex/vec/hyde variations
- Do not mix `expand:` with other typed lines — it's either a standalone expand query or a full query document
### Intent (Disambiguation)
When a query term is ambiguous, add `intent` to steer results:
```json
{
"searches": [
{ "type": "lex", "query": "performance" }
],
"intent": "web page load times and Core Web Vitals"
}
```
Intent affects expansion, reranking, chunk selection, and snippet extraction. It does not search on its own — it's a steering signal that disambiguates queries like "performance" (web-perf vs team health vs fitness).
### Combining Types
| Goal | Approach |
|------|----------|
| Know exact terms | `lex` only |
| Don't know vocabulary | Use a single-line query (implicit `expand:`) or `vec` |
| Best recall | `lex` + `vec` |
| Complex topic | `lex` + `vec` + `hyde` |
| Ambiguous query | Add `intent` to any combination above |
First query gets 2x weight in fusion — put your best guess first.
### Lex Query Syntax
| Syntax | Meaning | Example |
|--------|---------|---------|
| `term` | Prefix match | `perf` matches "performance" |
| `"phrase"` | Exact phrase | `"rate limiter"` |
| `-term` | Exclude | `performance -sports` |
Note: `-term` only works in lex queries, not vec/hyde.
### Collection Filtering
```json
{ "collections": ["docs"] } // Single
{ "collections": ["docs", "notes"] } // Multiple (OR)
```
Omit to search all collections.
## Other MCP Tools
| Tool | Use |
|------|-----|
| `get` | Retrieve doc by path or `#docid` |
| `multi_get` | Retrieve multiple by glob/list |
| `status` | Collections and health |
## CLI
```bash
qmd query "question" # Auto-expand + rerank
qmd query $'lex: X\nvec: Y' # Structured
qmd query $'expand: question' # Explicit expand
qmd query --json --explain "q" # Show score traces (RRF + rerank blend)
qmd search "keywords" # BM25 only (no LLM)
qmd get "#abc123" # By docid
qmd multi-get "journals/2026-*.md" -l 40 # Batch pull snippets by glob
qmd multi-get notes/foo.md,notes/bar.md # Comma-separated list, preserves order
```
## HTTP API
```bash
curl -X POST http://localhost:8181/query \
-H "Content-Type: application/json" \
-d '{"searches": [{"type": "lex", "query": "test"}]}'
```
## Setup
```bash
qmd collection add ~/notes --name notes
qmd embed
```